2222
2323//! This library provides an implementation of the parser combinators.
2424//!
25- //! An implementation of the parser should follow this interface:
26- //! ```zig
27- //! struct {
28- //! const Self = @This();
29- //!
30- //! /// The type of the result when parsing is successful
31- //! pub const ResultType: type;
32- //!
33- //! /// Should get bytes from the reader and puts it to the buffer if they were
34- //! /// not successfully parsed, or return the result of parsing.
35- //! fn parse(self: Self, cursor: *Cursor) anyerror!?ResultType
36- //! }
37- //! ```
3825//! Three different types of parser implementations exist:
3926//! 1. The inner parser implementations, which contain the logic for parsing the input.
4027//! 2. The public wrapper `ParserCombinator`, which provides methods to combine parsers and create new ones.
@@ -208,7 +195,39 @@ pub inline fn tuple(parsers: anytype) ParserCombinator(Tuple(@TypeOf(parsers)))
208195}
209196
210197/// Creates a parser that invokes the function `f` to create a tagged parser, which will be used
211- /// to parse the input. That tagged parser will be deinited at the end of parsing.
198+ /// to parse the input. That tagged parser will be deinited at the end of parsing if the destructor is provided.
199+ /// ```zig
200+ /// test {
201+ /// var result = std.ArrayList(u8).init(std.testing.allocator);
202+ /// defer result.deinit();
203+ /// // Grammar:
204+ /// // List <- Cons | Nil
205+ /// // Cons <- '(' Int List ')'
206+ /// // Nil <- "Nil"
207+ /// const parser = try struct {
208+ /// // this parser accumulates the numbers from an input to the list in reverse order
209+ /// // for simplicity of the example
210+ /// fn reversedList(accumulator: *std.ArrayList(u8)) !TaggedParser(void) {
211+ /// const nil = word("Nil");
212+ /// const cons = tuple(.{ char('('), int(u8), lazy(void, accumulator, reversedList), char(')') });
213+ /// const list = cons.orElse(nil);
214+ /// var parser = list.transform(void, accumulator, struct {
215+ /// fn append(acc: *std.ArrayList(u8), value: @TypeOf(list).ResultType) !void {
216+ /// switch (value) {
217+ /// .right => {},
218+ /// .left => |cns| try acc.append(cns[1]),
219+ /// }
220+ /// }
221+ /// }.append);
222+ /// return parser.taggedAllocated(accumulator.allocator);
223+ /// }
224+ /// }.reversedList(&result);
225+ /// defer parser.deinit();
226+ /// //
227+ /// std.debug.assert(try parser.parseString(std.testing.allocator, "(1(2(3Nil))))") != null);
228+ /// try std.testing.expectEqualSlices(u8, &.{ 3, 2, 1 }, result.items);
229+ ///}
230+ /// ```
212231pub inline fn lazy (
213232 comptime ResultType : type ,
214233 context : anytype ,
@@ -228,18 +247,23 @@ pub fn TaggedParser(comptime TaggedType: type) type {
228247
229248 const Self = @This ();
230249
231- alloc : std.mem.Allocator ,
250+ const Destructor = struct {
251+ alloc : std.mem.Allocator ,
252+ deinitFn : * const fn (alloc : std.mem.Allocator , underlying : * const anyopaque ) void ,
253+ };
254+
232255 underlying : * const anyopaque ,
233256 parseFn : * const fn (parser : * const anyopaque , cursor : * Cursor ) anyerror ! ? ResultType ,
234- deinitFn : * const fn ( alloc : std.mem.Allocator , underlying : * const anyopaque ) void ,
257+ destructor : ? Destructor = null ,
235258
236259 inline fn parse (self : Self , cursor : * Cursor ) anyerror ! ? ResultType {
237260 return try self .parseFn (self .underlying , cursor );
238261 }
239262
240- /// Deallocates memory with underlying parser.
241- pub fn deinit (self : Self ) void {
242- self .deinitFn (self .alloc , self .underlying );
263+ /// Deallocates memory with underlying parser if it was allocated on heap.
264+ pub inline fn deinit (self : Self ) void {
265+ if (self .destructor ) | ds |
266+ ds .deinitFn (ds .alloc , self .underlying );
243267 }
244268
245269 /// This method is similar to the same method in `ParserCombinator`.
@@ -277,24 +301,37 @@ pub fn ParserCombinator(comptime Implementation: type) type {
277301 /// The underlying implementation of the parser
278302 implementation : Implementation ,
279303
280- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
304+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
281305 try writer .print ("{any}" , .{self .implementation });
282306 }
283307
284- /// Allocates memory for underlying implementation by the `alloc`,
308+ /// Wraps the self parser into a tagged version, allowing the type of the underlying parser
309+ /// to be erased. Be cautious with the lifetime of the self parser. In most cases, the
310+ /// `taggedAllocated` method is safer.
311+ pub fn tagged (self : * Self ) ! TaggedParser (ResultType ) {
312+ const fns = struct {
313+ fn parse (ptr : * const anyopaque , cursor : * Cursor ) anyerror ! ? ResultType {
314+ const s : * const Self = @ptrCast (@alignCast (ptr ));
315+ return try s .parse (cursor );
316+ }
317+ };
318+ return .{ .underlying = self , .parseFn = fns .parse };
319+ }
320+
321+ /// Allocates memory for underlying implementation using `alloc`
285322 /// and copies underlying parser to that memory. It makes possible to erase the type of the
286323 /// underlying parser. The `deinit` method of the returned TaggedParser should be invoked
287324 /// to free allocated memory.
288325 /// Example:
289326 /// ```zig
290327 /// test {
291328 /// const p = char('a');
292- /// const tg: TaggedParser(u8) = try p.tagged (std.testing.allocator);
329+ /// const tg: TaggedParser(u8) = try p.taggedAllocated (std.testing.allocator);
293330 /// defer tg.deinit();
294331 /// try std.testing.expectEqual('a', try tg.parseString(std.testing.allocator, "a"));
295332 /// }
296333 /// ```
297- pub fn tagged (self : Self , alloc : std.mem.Allocator ) ! TaggedParser (ResultType ) {
334+ pub fn taggedAllocated (self : Self , alloc : std.mem.Allocator ) ! TaggedParser (ResultType ) {
298335 const fns = struct {
299336 fn parse (ptr : * const anyopaque , cursor : * Cursor ) anyerror ! ? ResultType {
300337 const implementation : * const Implementation = @ptrCast (@alignCast (ptr ));
@@ -308,10 +345,12 @@ pub fn ParserCombinator(comptime Implementation: type) type {
308345 const on_heap = try alloc .create (Implementation );
309346 on_heap .* = self .implementation ;
310347 return .{
311- .alloc = alloc ,
312348 .underlying = on_heap ,
313349 .parseFn = fns .parse ,
314- .deinitFn = fns .deinit ,
350+ .destructor = .{
351+ .alloc = alloc ,
352+ .deinitFn = fns .deinit ,
353+ },
315354 };
316355 }
317356
@@ -607,7 +646,7 @@ const Cursor = struct {
607646 self .buffer .deinit ();
608647 }
609648
610- pub fn format (self : Cursor , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
649+ fn format (self : Cursor , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
611650 if (self .idx < self .buffer .items .len ) {
612651 const left_bound = if (self .idx == 0 ) 0 else @min (self .idx - 1 , self .buffer .items .len );
613652 const right_bound = @min (self .idx + 1 , self .buffer .items .len );
@@ -685,7 +724,7 @@ const AnyChar = struct {
685724 }
686725 }
687726
688- pub fn format (_ : AnyChar , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
727+ fn format (_ : AnyChar , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
689728 try writer .writeAll ("<any char>" );
690729 }
691730};
@@ -709,7 +748,7 @@ fn Conditional(comptime Label: []const u8, Underlying: type, Context: type) type
709748 return null ;
710749 }
711750
712- pub fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
751+ fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
713752 try writer .writeAll (std .fmt .comptimePrint ("<{s}>" , .{Label }));
714753 }
715754 };
@@ -732,7 +771,7 @@ fn Const(comptime Underlying: type, comptime template: Underlying.ResultType) ty
732771 return null ;
733772 }
734773
735- pub fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
774+ fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
736775 try writer .writeAll (std .fmt .comptimePrint ("<Constant {any}>" , .{template }));
737776 }
738777 };
@@ -759,7 +798,7 @@ fn Slice(comptime Underlying: type) type {
759798 return try self .alloc .realloc (buffer , i );
760799 }
761800
762- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
801+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
763802 try writer .print ("<Slice of {s}>" , .{self .underlying });
764803 }
765804
@@ -791,7 +830,7 @@ fn Array(comptime Underlying: type, count: u8) type {
791830 return result ;
792831 }
793832
794- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
833+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
795834 try writer .print ("<Array of {s}>" , .{self .underlying });
796835 }
797836 };
@@ -825,7 +864,7 @@ fn SentinelArray(comptime Underlying: type, min_count: u8, max_count: u8) type {
825864 return result ;
826865 }
827866
828- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
867+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
829868 try writer .print ("<SentinelArray of {s}>" , .{self .underlying });
830869 }
831870 };
@@ -848,7 +887,7 @@ fn Collect(comptime Underlying: type, Collector: type) type {
848887 return self .collector ;
849888 }
850889
851- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
890+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
852891 try writer .print ("<Collect {any} to {any}>" , .{ @typeName (Collector ), self .underlying });
853892 }
854893 };
@@ -878,7 +917,7 @@ fn AndThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
878917 }
879918 }
880919
881- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
920+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
882921 try writer .print ("<{any} andThen {any}>" , .{ self .left , self .right });
883922 }
884923 };
@@ -899,7 +938,7 @@ fn LeftThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
899938 return null ;
900939 }
901940
902- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
941+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
903942 try writer .print ("<{any} leftThen {any}>" , .{ self .left , self .right });
904943 }
905944 };
@@ -920,7 +959,7 @@ fn RightThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
920959 return null ;
921960 }
922961
923- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
962+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
924963 try writer .print ("<{any} rightThen {any}>" , .{ self .left , self .right });
925964 }
926965 };
@@ -948,7 +987,7 @@ fn OrElse(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
948987 return null ;
949988 }
950989
951- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
990+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
952991 try writer .print ("<{any} orElse {any}>" , .{ self .left , self .right });
953992 }
954993 };
@@ -1004,7 +1043,7 @@ fn Tuple(comptime Underlyings: type) type {
10041043 return result ;
10051044 }
10061045
1007- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1046+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
10081047 try writer .print ("<Tuple of {any}>" , .{self .parsers });
10091048 }
10101049 };
@@ -1045,7 +1084,7 @@ fn OneCharOf(comptime chars: []const u8) type {
10451084 return std .math .order (lhs , rhs );
10461085 }
10471086
1048- pub fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1087+ fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
10491088 try writer .print ("<One char of \" {s}\" >" , .{chars });
10501089 }
10511090 };
@@ -1070,7 +1109,7 @@ fn Transform(comptime UnderlyingA: type, Context: type, B: type) type {
10701109 return null ;
10711110 }
10721111
1073- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1112+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
10741113 try writer .print ("<Transform result of the {any} to {any}>" , .{ self .underlying , @typeName (B ) });
10751114 }
10761115 };
@@ -1086,7 +1125,12 @@ fn Int(comptime T: type, max_buf_size: usize) type {
10861125 const orig_idx = cursor .idx ;
10871126 var buf : [max_buf_size ]u8 = undefined ;
10881127 var idx : usize = 0 ;
1089- const symbols = OneCharOf ("+-0123456789_boXABCDF" ){};
1128+ const sign = OneCharOf ("+-" ){};
1129+ if (try sign .parse (cursor )) | sg | {
1130+ buf [0 ] = sg ;
1131+ idx += 1 ;
1132+ }
1133+ const symbols = OneCharOf ("0123456789_boXABCDF" ){};
10901134 while (try symbols .parse (cursor )) | s | : (idx += 1 ) {
10911135 buf [idx ] = s ;
10921136 }
@@ -1096,7 +1140,7 @@ fn Int(comptime T: type, max_buf_size: usize) type {
10961140 };
10971141 }
10981142
1099- pub fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1143+ fn format (_ : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
11001144 try writer .writeAll ("<Integer>" );
11011145 }
11021146 };
@@ -1116,7 +1160,7 @@ fn Lazy(comptime Context: type, Type: type) type {
11161160 return try parser .parse (cursor );
11171161 }
11181162
1119- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1163+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
11201164 try writer .print ("<Lazy {any}>" , .{self .buildParserFn });
11211165 }
11221166 };
@@ -1146,7 +1190,7 @@ fn Logged(comptime Underlying: type, scope: @Type(.EnumLiteral)) type {
11461190 }
11471191 }
11481192
1149- pub fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
1193+ fn format (self : @This (), comptime _ : []const u8 , _ : std .fmt .FormatOptions , writer : anytype ) ! void {
11501194 try writer .print ("<Logged with scope {s} {any}>" , .{ @tagName (scope ), self .underlying });
11511195 }
11521196 };
@@ -1170,7 +1214,7 @@ const End = struct {
11701214 }
11711215 }
11721216
1173- pub fn format (_ : AnyChar , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
1217+ fn format (_ : AnyChar , comptime _ : []const u8 , _ : std.fmt.FormatOptions , writer : anytype ) ! void {
11741218 try writer .writeAll ("<End of input>" );
11751219 }
11761220};
@@ -1261,7 +1305,7 @@ test "Parser tuple example" {
12611305
12621306test "tagged example" {
12631307 const p = char ('a' );
1264- const tg : TaggedParser (u8 ) = try p .tagged (std .testing .allocator );
1308+ const tg : TaggedParser (u8 ) = try p .taggedAllocated (std .testing .allocator );
12651309 defer tg .deinit ();
12661310
12671311 try std .testing .expectEqual ('a' , try tg .parseString (std .testing .allocator , "a" ));
@@ -1375,4 +1419,33 @@ test "transform example" {
13751419 try std .testing .expectEqual (42 , try p .parseString (std .testing .allocator , "42" ));
13761420}
13771421
1378- // TODO: provide an example of using LazyParser
1422+ test "lazy example" {
1423+ var result = std .ArrayList (u8 ).init (std .testing .allocator );
1424+ defer result .deinit ();
1425+ // Grammar:
1426+ // List <- Cons | Nil
1427+ // Cons <- '(' Int List ')'
1428+ // Nil <- "Nil"
1429+ const parser = try struct {
1430+ // this parser accumulates the numbers from an input to the list in reverse order
1431+ // for simplicity of the example
1432+ fn reversedList (accumulator : * std .ArrayList (u8 )) ! TaggedParser (void ) {
1433+ const nil = word ("Nil" );
1434+ const cons = tuple (.{ char ('(' ), int (u8 ), lazy (void , accumulator , reversedList ), char (')' ) });
1435+ const list = cons .orElse (nil );
1436+ var parser = list .transform (void , accumulator , struct {
1437+ fn append (acc : * std .ArrayList (u8 ), value : @TypeOf (list ).ResultType ) ! void {
1438+ switch (value ) {
1439+ .right = > {},
1440+ .left = > | cns | try acc .append (cns [1 ]),
1441+ }
1442+ }
1443+ }.append );
1444+ return parser .taggedAllocated (accumulator .allocator );
1445+ }
1446+ }.reversedList (& result );
1447+ defer parser .deinit ();
1448+
1449+ std .debug .assert (try parser .parseString (std .testing .allocator , "(1(2(3Nil))))" ) != null );
1450+ try std .testing .expectEqualSlices (u8 , &.{ 3 , 2 , 1 }, result .items );
1451+ }
0 commit comments