From 15dd4e71296ce234a327764459ac1391d5f9b480 Mon Sep 17 00:00:00 2001 From: Warkanlock Date: Sun, 3 Dec 2023 18:05:19 +0100 Subject: [PATCH] chore(documentation): add context on why 48-vector element --- src/llm/walkthrough/Walkthrough00_Intro.tsx | 3 ++- src/llm/walkthrough/Walkthrough02_Embedding.tsx | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llm/walkthrough/Walkthrough00_Intro.tsx b/src/llm/walkthrough/Walkthrough00_Intro.tsx index 398164c..088b504 100644 --- a/src/llm/walkthrough/Walkthrough00_Intro.tsx +++ b/src/llm/walkthrough/Walkthrough00_Intro.tsx @@ -130,7 +130,8 @@ and sort them in alphabetical order, i.e. to "ABBBCC".`; breakAfter(); let c5 = commentary(wt)`In the 3d view, the each green cell represents a number being processed, and each blue cell is a weight. ${embed(GreenBlueCells)} - Each number in the sequence first gets turned into a 48 element vector. This is called an _embedding_.`; + Each number in the sequence first gets turned into a 48 element vector (number defined in the gpt-nano architecture). This is called an _embedding_.`; + breakAfter(c5); { diff --git a/src/llm/walkthrough/Walkthrough02_Embedding.tsx b/src/llm/walkthrough/Walkthrough02_Embedding.tsx index a9e2e86..5a08ada 100644 --- a/src/llm/walkthrough/Walkthrough02_Embedding.tsx +++ b/src/llm/walkthrough/Walkthrough02_Embedding.tsx @@ -95,6 +95,8 @@ The ${c_dimRef('_C_', DimStyle.C)} stands for ${c_dimRef('_channel_', DimStyle.C This matrix, which we'll refer to as the ${c_blockRef('_input embedding_', state.layout.residual0)} is now ready to be passed down through the model. This collection of ${c_dimRef('T', DimStyle.T)} columns each of length ${c_dimRef('C', DimStyle.C)} will become a familiar sight throughout this guide. + +For this particular model, we are using a 48-element vector for the embeddings given that it's based on the initial gpt-nano configurations and is a number that the developers should decided on in advance. `; cleanup(t9_cleanupInstant, [t3_moveTokenEmbed, t5_movePosEmbed, t6_plusSymAnim, t7_addAnim, t8_placeAnim]);