bbycroft · Warkanlock · Dec 3, 2023
diff --git a/src/llm/walkthrough/Walkthrough00_Intro.tsx b/src/llm/walkthrough/Walkthrough00_Intro.tsx
@@ -130,7 +130,8 @@ and sort them in alphabetical order, i.e. to "ABBBCC".`;
     breakAfter();
 
     let c5 = commentary(wt)`In the 3d view, the each green cell represents a number being processed, and each blue cell is a weight. ${embed(GreenBlueCells)}
-    Each number in the sequence first gets turned into a 48 element vector. This is called an _embedding_.`;
+    Each number in the sequence first gets turned into a 48 element vector (number defined in the gpt-nano architecture). This is called an _embedding_.`;
+
     breakAfter(c5);
 
     {

diff --git a/src/llm/walkthrough/Walkthrough02_Embedding.tsx b/src/llm/walkthrough/Walkthrough02_Embedding.tsx
@@ -95,6 +95,8 @@ The ${c_dimRef('_C_', DimStyle.C)} stands for ${c_dimRef('_channel_', DimStyle.C
 
 This matrix, which we'll refer to as the ${c_blockRef('_input embedding_', state.layout.residual0)} is now ready to be passed down through the model.
 This collection of ${c_dimRef('T', DimStyle.T)} columns each of length ${c_dimRef('C', DimStyle.C)} will become a familiar sight throughout this guide.
+
+For this particular model, we are using a 48-element vector for the embeddings given that it's based on the initial gpt-nano configurations and is a number that the developers should decided on in advance.
 `;
 
     cleanup(t9_cleanupInstant, [t3_moveTokenEmbed, t5_movePosEmbed, t6_plusSymAnim, t7_addAnim, t8_placeAnim]);