jonnyfiveisonline
diff --git a/‎dune-project‎
Lines changed: 3 additions & 3 deletions b/‎dune-project‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎odoc-parser.opam‎
Lines changed: 2 additions & 0 deletions b/‎odoc-parser.opam‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/parser/TODO.md‎
Lines changed: 46 additions & 0 deletions b/‎src/parser/TODO.md‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎src/parser/ast.ml‎
Lines changed: 15 additions & 5 deletions b/‎src/parser/ast.ml‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎src/parser/dune‎
Lines changed: 5 additions & 1 deletion b/‎src/parser/dune‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/parser/lexer.mli‎
Lines changed: 2 additions & 4 deletions b/‎src/parser/lexer.mli‎
Lines changed: 2 additions & 4 deletions
@@ -11,7 +11,7 @@
 
 (authors
  "Anton Bachin <antonbachin@yahoo.com>"
- "Daniel B\195\188nzli <daniel.buenzli@erratique.ch>"
+ "Daniel Bünzli <daniel.buenzli@erratique.ch>"
  "David Sheets <sheets@alum.mit.edu>"
  "Jon Ludlam <jon@recoil.org>"
  "Jules Aguillon <juloo.dsi@gmail.com>"
@@ -23,10 +23,10 @@
  "Emile Trotignon <emile.trotignon@gmail.com>")
 
 (maintainers
- "Daniel B\195\188nzli <daniel.buenzli@erratique.ch>"
+ "Daniel Bünzli <daniel.buenzli@erratique.ch>"
  "Jon Ludlam <jon@recoil.org>"
  "Jules Aguillon <juloo.dsi@gmail.com>"
- "Paul-Elliot Angl\195\168s d'Auriac <paul-elliot@tarides.com>")
+ "Paul-Elliot Anglès d'Auriac <paul-elliot@tarides.com>")
 
 (cram enable)
 
 
@@ -17,6 +17,8 @@ depends: [
   "ocaml" {>= "4.08.0" & < "5.5"}
   "astring"
   "camlp-streams"
+  "menhir" {>= "20211128"}
+  "menhirLib"
   "ppx_expect" {with-test}
   "sexplib0" {with-test}
 ]
 
@@ -0,0 +1,46 @@
+- Some locations are still not accurate. This seems to be acting up in comments that span 
+  many lines. There is potentially an off-by-one error or similar in 
+  `Lexer.update_content_newlines` which is (supposed) to increment the lexbuf's line 
+  position for every newline encountered in some content (i.e. inside of a code or math block)
+
+- Top-level errors like two nestable block elements or headings on the same line
+  need to be handled. Currently, they parse correctly but do not emit a warning. 
+
+- Repetition in `tag_with_content` parse rule(parser.mly:207). Two productions are identical 
+  save for a newline. This is because an optional newline causes a reduce conflict due to 
+  `nestable_block_element`'s handling of whitespace.
+
+- Improve error handling inside light table cells. Currently, we cannot do much besides use 
+  Menhir's `error` token, which erases all information about the error which happened and we 
+  have to use a string of the offending token to display what went wrong to users, which 
+  doesn't necessarily communicate a lot
+
+- Tests. There are a few tests, like the ones which test the positions in the lexing buffer,
+  which don't apply to the new parser. Others expect error messages which cannot be produced
+  by the relevant parser rule
+
+- Likely some error cases which have not been handled. These should be trivial to fix, 
+  you should really only need to add a new production to the relevant parser rule which 
+  handles the offending token
+
+Notes for anyone working on this
+- Due to the nature of Menhir, this parser is difficult to work on. 
+  - Changes will have unexpected non-local consequences due to more or less tokens being consumed by 
+    some neighboring (in the parse tree) rule. 
+  - You need to familiarize yourself with the branch of the parse tree that you're working on 
+    (i.e. toplevel->nestable_block_element->paragraph) before you start making non-trivial changes.
+  - Type errors will point towards unrelated sections of the parser or give you incorrect information 
+    about what has gone wrong. 
+
+- If you need to emulate some sort of context like "paragraphs can't accept '|' tokens if they're inside 
+  tables", then you need to parameterize that rule by some other rule which dictates what it can accept. 
+  For example, toplevel block elements match `paragraph(any_symbol)` and tables match 
+  `paragraph(symbols_except_bar)`
+
+- Be as specific as possible. Avoid optional tokens when possible. Prefer the non-empty
+  list rules (`sequence_nonempty`, `sequence_separated_nonempty`) over the alternatives. 
+  Ambiguity will produce a compile-time reduce/reduce rule if you're lucky, unexpected 
+  behavior at runtime if you're not.
+
+- Contact me on the company slack or at faycarsons23@gmail.com if you're confused about 
+  anything!
@@ -1,5 +1,17 @@
 (** Abstract syntax tree representing ocamldoc comments *)
 
+(* TODO: (@faycarsons)
+   We no longer need polymorphism in the parser, so for performance and
+   simplicity's sake the AST should (probably, assuming no issues in other
+   parts of Odoc) be refactored to use nominal sum types
+*)
+
+type list_kind = [ `Ordered | `Unordered ]
+type list_syntax = [ `Light | `Heavy ]
+type list_item = [ `Li | `Dash ]
+
+type table_cell_kind = [ `Header | `Data ]
+
 (** This is a syntactic representation of ocamldoc comments. See
     {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html}The manual} for a
     detailed description of the syntax understood. Note that there is no attempt
@@ -47,8 +59,8 @@ type code_block_meta = {
   tags : code_block_tags;
 }
 
-type media = Token.media
-type media_href = Token.media_href
+type media = [ `Audio | `Video | `Image ]
+type media_href = [ `Reference of string | `Link of string ]
 
 type code_block = {
   meta : code_block_meta option;
@@ -69,9 +81,7 @@ and nestable_block_element =
         {!Odoc_parser.verbatim_content} *)
   | `Modules of string with_location list
   | `List of
-    [ `Unordered | `Ordered ]
-    * [ `Light | `Heavy ]
-    * nestable_block_element with_location list list
+    list_kind * list_syntax * nestable_block_element with_location list list
   | `Table of table
   | `Math_block of string  (** @since 2.0.0 *)
   | `Media of reference_kind * media_href with_location * string * media
 
@@ -1,5 +1,9 @@
 (ocamllex lexer)
 
+(menhir
+ (modules parser)
+ (flags --table --external-tokens Tokens --explain))
+
 (library
  (name odoc_parser)
  (public_name odoc-parser)
@@ -9,4 +13,4 @@
   (backend bisect_ppx))
  (flags
   (:standard -w -50))
- (libraries astring camlp-streams))
+ (libraries astring camlp-streams menhirLib))
@@ -3,9 +3,7 @@
 type input = {
   file : string;
   offset_to_location : int -> Loc.point;
-  warnings : Warning.t list ref;
-  lexbuf : Lexing.lexbuf;
-  string_buffer : Buffer.t;
+  mutable warnings : Warning.t list;
 }
 
-val token : input -> Lexing.lexbuf -> Token.t Loc.with_location
+val token : input -> Lexing.lexbuf -> Parser.token
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@ depends: [`
`17`	`17`	`"ocaml" {>= "4.08.0" & < "5.5"}`
`18`	`18`	`"astring"`
`19`	`19`	`"camlp-streams"`
	`20`	`+ "menhir" {>= "20211128"}`
	`21`	`+ "menhirLib"`
`20`	`22`	`"ppx_expect" {with-test}`
`21`	`23`	`"sexplib0" {with-test}`
`22`	`24`	`]`