diff --git a/.gitignore b/.gitignore index 4385a0b..387eacb 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,8 @@ ch05/07_gpt_to_llama/Llama-3.1-8B ch05/07_gpt_to_llama/Llama-3.1-8B-Instruct ch05/07_gpt_to_llama/Llama-3.2-1B ch05/07_gpt_to_llama/Llama-3.2-1B-Instruct +ch05/07_gpt_to_llama/Llama-3.2-3B +ch05/07_gpt_to_llama/Llama-3.2-3B-Instruct ch06/01_main-chapter-code/gpt2 ch06/02_bonus_additional-experiments/gpt2 diff --git a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb index db301f5..e75ff4a 100644 --- a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb +++ b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb @@ -409,7 +409,7 @@ "self.pos_emb = nn.Embedding(cfg[\"context_length\"], cfg[\"emb_dim\"])\n", "```\n", "\n", - "- Instead of these absolute positional embeddings, Llama uses relative positional embeddings, called rotary position embeddings (RoPE for short)\n", + "- Unlike traditional absolute positional embeddings, Llama uses rotary position embeddings (RoPE), which enable it to capture both absolute and relative positional information simultaneously\n", "- The reference paper for RoPE is [RoFormer: Enhanced Transformer with Rotary Position Embedding (2021)](https://arxiv.org/abs/2104.09864)" ] },