Skip to content

Commit f7f49b5

Browse files
Add example for lazy parser
.
1 parent d18df42 commit f7f49b5

3 files changed

Lines changed: 124 additions & 48 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
[![parcom ci](https://github.com/dokwork/parcom/actions/workflows/ci.yml/badge.svg)](https://github.com/dokwork/parcom/actions/workflows/ci.yml)
44

55
> [!WARNING]
6+
> This library is underdeveloped. API is not stable.
7+
8+
> [!INFO]
69
> Required version of zig is 0.13.0
710
811
Parser combinators for Zig.

examples/expression.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ fn expr(alloc: std.mem.Allocator) !p.TaggedParser(Value) {
103103
.transform(Value, alloc, fns.calculate);
104104
};
105105

106-
return try sum.tagged(alloc);
106+
return try sum.taggedAllocated(alloc);
107107
}
108108

109109
fn evaluate(alloc: std.mem.Allocator, expression: []const u8) !?Value {

src/parcom.zig

Lines changed: 120 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,6 @@
2222

2323
//! This library provides an implementation of the parser combinators.
2424
//!
25-
//! An implementation of the parser should follow this interface:
26-
//! ```zig
27-
//! struct {
28-
//! const Self = @This();
29-
//!
30-
//! /// The type of the result when parsing is successful
31-
//! pub const ResultType: type;
32-
//!
33-
//! /// Should get bytes from the reader and puts it to the buffer if they were
34-
//! /// not successfully parsed, or return the result of parsing.
35-
//! fn parse(self: Self, cursor: *Cursor) anyerror!?ResultType
36-
//! }
37-
//! ```
3825
//! Three different types of parser implementations exist:
3926
//! 1. The inner parser implementations, which contain the logic for parsing the input.
4027
//! 2. The public wrapper `ParserCombinator`, which provides methods to combine parsers and create new ones.
@@ -208,7 +195,39 @@ pub inline fn tuple(parsers: anytype) ParserCombinator(Tuple(@TypeOf(parsers)))
208195
}
209196

210197
/// Creates a parser that invokes the function `f` to create a tagged parser, which will be used
211-
/// to parse the input. That tagged parser will be deinited at the end of parsing.
198+
/// to parse the input. That tagged parser will be deinited at the end of parsing if the destructor is provided.
199+
/// ```zig
200+
/// test {
201+
/// var result = std.ArrayList(u8).init(std.testing.allocator);
202+
/// defer result.deinit();
203+
/// // Grammar:
204+
/// // List <- Cons | Nil
205+
/// // Cons <- '(' Int List ')'
206+
/// // Nil <- "Nil"
207+
/// const parser = try struct {
208+
/// // this parser accumulates the numbers from an input to the list in reverse order
209+
/// // for simplicity of the example
210+
/// fn reversedList(accumulator: *std.ArrayList(u8)) !TaggedParser(void) {
211+
/// const nil = word("Nil");
212+
/// const cons = tuple(.{ char('('), int(u8), lazy(void, accumulator, reversedList), char(')') });
213+
/// const list = cons.orElse(nil);
214+
/// var parser = list.transform(void, accumulator, struct {
215+
/// fn append(acc: *std.ArrayList(u8), value: @TypeOf(list).ResultType) !void {
216+
/// switch (value) {
217+
/// .right => {},
218+
/// .left => |cns| try acc.append(cns[1]),
219+
/// }
220+
/// }
221+
/// }.append);
222+
/// return parser.taggedAllocated(accumulator.allocator);
223+
/// }
224+
/// }.reversedList(&result);
225+
/// defer parser.deinit();
226+
/// //
227+
/// std.debug.assert(try parser.parseString(std.testing.allocator, "(1(2(3Nil))))") != null);
228+
/// try std.testing.expectEqualSlices(u8, &.{ 3, 2, 1 }, result.items);
229+
///}
230+
/// ```
212231
pub inline fn lazy(
213232
comptime ResultType: type,
214233
context: anytype,
@@ -228,18 +247,23 @@ pub fn TaggedParser(comptime TaggedType: type) type {
228247

229248
const Self = @This();
230249

231-
alloc: std.mem.Allocator,
250+
const Destructor = struct {
251+
alloc: std.mem.Allocator,
252+
deinitFn: *const fn (alloc: std.mem.Allocator, underlying: *const anyopaque) void,
253+
};
254+
232255
underlying: *const anyopaque,
233256
parseFn: *const fn (parser: *const anyopaque, cursor: *Cursor) anyerror!?ResultType,
234-
deinitFn: *const fn (alloc: std.mem.Allocator, underlying: *const anyopaque) void,
257+
destructor: ?Destructor = null,
235258

236259
inline fn parse(self: Self, cursor: *Cursor) anyerror!?ResultType {
237260
return try self.parseFn(self.underlying, cursor);
238261
}
239262

240-
/// Deallocates memory with underlying parser.
241-
pub fn deinit(self: Self) void {
242-
self.deinitFn(self.alloc, self.underlying);
263+
/// Deallocates memory with underlying parser if it was allocated on heap.
264+
pub inline fn deinit(self: Self) void {
265+
if (self.destructor) |ds|
266+
ds.deinitFn(ds.alloc, self.underlying);
243267
}
244268

245269
/// This method is similar to the same method in `ParserCombinator`.
@@ -277,24 +301,37 @@ pub fn ParserCombinator(comptime Implementation: type) type {
277301
/// The underlying implementation of the parser
278302
implementation: Implementation,
279303

280-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
304+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
281305
try writer.print("{any}", .{self.implementation});
282306
}
283307

284-
/// Allocates memory for underlying implementation by the `alloc`,
308+
/// Wraps the self parser into a tagged version, allowing the type of the underlying parser
309+
/// to be erased. Be cautious with the lifetime of the self parser. In most cases, the
310+
/// `taggedAllocated` method is safer.
311+
pub fn tagged(self: *Self) !TaggedParser(ResultType) {
312+
const fns = struct {
313+
fn parse(ptr: *const anyopaque, cursor: *Cursor) anyerror!?ResultType {
314+
const s: *const Self = @ptrCast(@alignCast(ptr));
315+
return try s.parse(cursor);
316+
}
317+
};
318+
return .{ .underlying = self, .parseFn = fns.parse };
319+
}
320+
321+
/// Allocates memory for underlying implementation using `alloc`
285322
/// and copies underlying parser to that memory. It makes possible to erase the type of the
286323
/// underlying parser. The `deinit` method of the returned TaggedParser should be invoked
287324
/// to free allocated memory.
288325
/// Example:
289326
/// ```zig
290327
/// test {
291328
/// const p = char('a');
292-
/// const tg: TaggedParser(u8) = try p.tagged(std.testing.allocator);
329+
/// const tg: TaggedParser(u8) = try p.taggedAllocated(std.testing.allocator);
293330
/// defer tg.deinit();
294331
/// try std.testing.expectEqual('a', try tg.parseString(std.testing.allocator, "a"));
295332
/// }
296333
/// ```
297-
pub fn tagged(self: Self, alloc: std.mem.Allocator) !TaggedParser(ResultType) {
334+
pub fn taggedAllocated(self: Self, alloc: std.mem.Allocator) !TaggedParser(ResultType) {
298335
const fns = struct {
299336
fn parse(ptr: *const anyopaque, cursor: *Cursor) anyerror!?ResultType {
300337
const implementation: *const Implementation = @ptrCast(@alignCast(ptr));
@@ -308,10 +345,12 @@ pub fn ParserCombinator(comptime Implementation: type) type {
308345
const on_heap = try alloc.create(Implementation);
309346
on_heap.* = self.implementation;
310347
return .{
311-
.alloc = alloc,
312348
.underlying = on_heap,
313349
.parseFn = fns.parse,
314-
.deinitFn = fns.deinit,
350+
.destructor = .{
351+
.alloc = alloc,
352+
.deinitFn = fns.deinit,
353+
},
315354
};
316355
}
317356

@@ -607,7 +646,7 @@ const Cursor = struct {
607646
self.buffer.deinit();
608647
}
609648

610-
pub fn format(self: Cursor, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
649+
fn format(self: Cursor, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
611650
if (self.idx < self.buffer.items.len) {
612651
const left_bound = if (self.idx == 0) 0 else @min(self.idx - 1, self.buffer.items.len);
613652
const right_bound = @min(self.idx + 1, self.buffer.items.len);
@@ -685,7 +724,7 @@ const AnyChar = struct {
685724
}
686725
}
687726

688-
pub fn format(_: AnyChar, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
727+
fn format(_: AnyChar, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
689728
try writer.writeAll("<any char>");
690729
}
691730
};
@@ -709,7 +748,7 @@ fn Conditional(comptime Label: []const u8, Underlying: type, Context: type) type
709748
return null;
710749
}
711750

712-
pub fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
751+
fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
713752
try writer.writeAll(std.fmt.comptimePrint("<{s}>", .{Label}));
714753
}
715754
};
@@ -732,7 +771,7 @@ fn Const(comptime Underlying: type, comptime template: Underlying.ResultType) ty
732771
return null;
733772
}
734773

735-
pub fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
774+
fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
736775
try writer.writeAll(std.fmt.comptimePrint("<Constant {any}>", .{template}));
737776
}
738777
};
@@ -759,7 +798,7 @@ fn Slice(comptime Underlying: type) type {
759798
return try self.alloc.realloc(buffer, i);
760799
}
761800

762-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
801+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
763802
try writer.print("<Slice of {s}>", .{self.underlying});
764803
}
765804

@@ -791,7 +830,7 @@ fn Array(comptime Underlying: type, count: u8) type {
791830
return result;
792831
}
793832

794-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
833+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
795834
try writer.print("<Array of {s}>", .{self.underlying});
796835
}
797836
};
@@ -825,7 +864,7 @@ fn SentinelArray(comptime Underlying: type, min_count: u8, max_count: u8) type {
825864
return result;
826865
}
827866

828-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
867+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
829868
try writer.print("<SentinelArray of {s}>", .{self.underlying});
830869
}
831870
};
@@ -848,7 +887,7 @@ fn Collect(comptime Underlying: type, Collector: type) type {
848887
return self.collector;
849888
}
850889

851-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
890+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
852891
try writer.print("<Collect {any} to {any}>", .{ @typeName(Collector), self.underlying });
853892
}
854893
};
@@ -878,7 +917,7 @@ fn AndThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
878917
}
879918
}
880919

881-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
920+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
882921
try writer.print("<{any} andThen {any}>", .{ self.left, self.right });
883922
}
884923
};
@@ -899,7 +938,7 @@ fn LeftThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
899938
return null;
900939
}
901940

902-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
941+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
903942
try writer.print("<{any} leftThen {any}>", .{ self.left, self.right });
904943
}
905944
};
@@ -920,7 +959,7 @@ fn RightThen(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
920959
return null;
921960
}
922961

923-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
962+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
924963
try writer.print("<{any} rightThen {any}>", .{ self.left, self.right });
925964
}
926965
};
@@ -948,7 +987,7 @@ fn OrElse(comptime UnderlyingLeft: type, UnderlyingRight: type) type {
948987
return null;
949988
}
950989

951-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
990+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
952991
try writer.print("<{any} orElse {any}>", .{ self.left, self.right });
953992
}
954993
};
@@ -1004,7 +1043,7 @@ fn Tuple(comptime Underlyings: type) type {
10041043
return result;
10051044
}
10061045

1007-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1046+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
10081047
try writer.print("<Tuple of {any}>", .{self.parsers});
10091048
}
10101049
};
@@ -1045,7 +1084,7 @@ fn OneCharOf(comptime chars: []const u8) type {
10451084
return std.math.order(lhs, rhs);
10461085
}
10471086

1048-
pub fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1087+
fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
10491088
try writer.print("<One char of \"{s}\">", .{chars});
10501089
}
10511090
};
@@ -1070,7 +1109,7 @@ fn Transform(comptime UnderlyingA: type, Context: type, B: type) type {
10701109
return null;
10711110
}
10721111

1073-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1112+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
10741113
try writer.print("<Transform result of the {any} to {any}>", .{ self.underlying, @typeName(B) });
10751114
}
10761115
};
@@ -1086,7 +1125,12 @@ fn Int(comptime T: type, max_buf_size: usize) type {
10861125
const orig_idx = cursor.idx;
10871126
var buf: [max_buf_size]u8 = undefined;
10881127
var idx: usize = 0;
1089-
const symbols = OneCharOf("+-0123456789_boXABCDF"){};
1128+
const sign = OneCharOf("+-"){};
1129+
if (try sign.parse(cursor)) |sg| {
1130+
buf[0] = sg;
1131+
idx += 1;
1132+
}
1133+
const symbols = OneCharOf("0123456789_boXABCDF"){};
10901134
while (try symbols.parse(cursor)) |s| : (idx += 1) {
10911135
buf[idx] = s;
10921136
}
@@ -1096,7 +1140,7 @@ fn Int(comptime T: type, max_buf_size: usize) type {
10961140
};
10971141
}
10981142

1099-
pub fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1143+
fn format(_: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
11001144
try writer.writeAll("<Integer>");
11011145
}
11021146
};
@@ -1116,7 +1160,7 @@ fn Lazy(comptime Context: type, Type: type) type {
11161160
return try parser.parse(cursor);
11171161
}
11181162

1119-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1163+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
11201164
try writer.print("<Lazy {any}>", .{self.buildParserFn});
11211165
}
11221166
};
@@ -1146,7 +1190,7 @@ fn Logged(comptime Underlying: type, scope: @Type(.EnumLiteral)) type {
11461190
}
11471191
}
11481192

1149-
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1193+
fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
11501194
try writer.print("<Logged with scope {s} {any}>", .{ @tagName(scope), self.underlying });
11511195
}
11521196
};
@@ -1170,7 +1214,7 @@ const End = struct {
11701214
}
11711215
}
11721216

1173-
pub fn format(_: AnyChar, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
1217+
fn format(_: AnyChar, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
11741218
try writer.writeAll("<End of input>");
11751219
}
11761220
};
@@ -1261,7 +1305,7 @@ test "Parser tuple example" {
12611305

12621306
test "tagged example" {
12631307
const p = char('a');
1264-
const tg: TaggedParser(u8) = try p.tagged(std.testing.allocator);
1308+
const tg: TaggedParser(u8) = try p.taggedAllocated(std.testing.allocator);
12651309
defer tg.deinit();
12661310

12671311
try std.testing.expectEqual('a', try tg.parseString(std.testing.allocator, "a"));
@@ -1375,4 +1419,33 @@ test "transform example" {
13751419
try std.testing.expectEqual(42, try p.parseString(std.testing.allocator, "42"));
13761420
}
13771421

1378-
// TODO: provide an example of using LazyParser
1422+
test "lazy example" {
1423+
var result = std.ArrayList(u8).init(std.testing.allocator);
1424+
defer result.deinit();
1425+
// Grammar:
1426+
// List <- Cons | Nil
1427+
// Cons <- '(' Int List ')'
1428+
// Nil <- "Nil"
1429+
const parser = try struct {
1430+
// this parser accumulates the numbers from an input to the list in reverse order
1431+
// for simplicity of the example
1432+
fn reversedList(accumulator: *std.ArrayList(u8)) !TaggedParser(void) {
1433+
const nil = word("Nil");
1434+
const cons = tuple(.{ char('('), int(u8), lazy(void, accumulator, reversedList), char(')') });
1435+
const list = cons.orElse(nil);
1436+
var parser = list.transform(void, accumulator, struct {
1437+
fn append(acc: *std.ArrayList(u8), value: @TypeOf(list).ResultType) !void {
1438+
switch (value) {
1439+
.right => {},
1440+
.left => |cns| try acc.append(cns[1]),
1441+
}
1442+
}
1443+
}.append);
1444+
return parser.taggedAllocated(accumulator.allocator);
1445+
}
1446+
}.reversedList(&result);
1447+
defer parser.deinit();
1448+
1449+
std.debug.assert(try parser.parseString(std.testing.allocator, "(1(2(3Nil))))") != null);
1450+
try std.testing.expectEqualSlices(u8, &.{ 3, 2, 1 }, result.items);
1451+
}

0 commit comments

Comments
 (0)