mirror of
https://github.com/pese-git/llm-arch-research.git
synced 2026-01-23 21:10:54 +00:00
docs(gpt1): add architecture diagrams and notebook updates
- Added architecture diagrams for GPT-1: gpt1.drawio, gpt11.drawio (drawio format) - Exported visualization images: gpt1.png, gpt1.svg for documentation and presentations - Updated gpt.ipynb notebook to reference new materials and possibly add explanations of layers/logic - New assets help to clarify model structure and training flow for both contributors and external users
This commit is contained in:
145
assets/drawio/gpt1.drawio
Normal file
145
assets/drawio/gpt1.drawio
Normal file
@@ -0,0 +1,145 @@
|
||||
<mxfile host="65bd71144e">
|
||||
<diagram name="GPT Architecture" id="DEYydPS-O6mnllJWumln">
|
||||
<mxGraphModel dx="1339" dy="371" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0"/>
|
||||
<mxCell id="1" parent="0"/>
|
||||
<mxCell id="3" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;" vertex="1" parent="1">
|
||||
<mxGeometry x="280" y="330" width="440" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="4" value="<div>Masked</div>Multi+Head<br>Attention" style="rounded=0;whiteSpace=wrap;html=1;strokeColor=#6c8ebf;fillColor=#dae8fc;" vertex="1" parent="3">
|
||||
<mxGeometry x="51.42776556776556" y="50" width="78.97435897435898" height="60" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="22" value="" style="edgeStyle=none;html=1;" edge="1" parent="3" source="5">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="350" y="80" as="targetPoint"/>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="5" value="Feed<div>Forward</div><div>Network</div>" style="rounded=0;whiteSpace=wrap;html=1;strokeColor=#9673a6;fillColor=#e1d5e7;" vertex="1" parent="3">
|
||||
<mxGeometry x="260.9564102564102" y="50" width="71.9230769230769" height="60" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="7" value="Norm" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="3">
|
||||
<mxGeometry x="379.997619047619" y="60" width="37.87142857142857" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="21" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="3" source="12" target="5">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="12" value="Norm" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="3">
|
||||
<mxGeometry x="177.14285714285714" y="60" width="41.904761904761905" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="13" value="" style="endArrow=classic;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;edgeStyle=elbowEdgeStyle;" edge="1" parent="3" source="3" target="4">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="20" y="80.00000000000011" as="sourcePoint"/>
|
||||
<mxPoint x="229.52380952380952" y="-50" as="targetPoint"/>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="14" value="" style="endArrow=classic;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;edgeStyle=orthogonalEdgeStyle;" edge="1" parent="3" source="18" target="12">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="155.71428571427464" y="79.99999999999989" as="sourcePoint"/>
|
||||
<mxPoint x="229.52380952380952" y="-50" as="targetPoint"/>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="18" value="+" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="3">
|
||||
<mxGeometry x="150.00428571428571" y="75" width="10" height="10" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="19" value="" style="endArrow=classic;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;edgeStyle=orthogonalEdgeStyle;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="3" source="4" target="18">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="213.80952380952382" y="410" as="sourcePoint"/>
|
||||
<mxPoint x="145.71428571428578" y="80.00000000000011" as="targetPoint"/>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="23" value="" style="endArrow=classic;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="3" target="24">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="236.85714285714286" y="80" as="sourcePoint"/>
|
||||
<mxPoint x="349.7619047619048" y="85" as="targetPoint"/>
|
||||
<Array as="points">
|
||||
<mxPoint x="236.67904761904765" y="125"/>
|
||||
<mxPoint x="292.38095238095235" y="125"/>
|
||||
<mxPoint x="355" y="125"/>
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="28" value="" style="edgeStyle=none;html=1;" edge="1" parent="3" source="24" target="7">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="24" value="+" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="3">
|
||||
<mxGeometry x="350.00190476190477" y="75" width="10" height="10" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="25" value="" style="endArrow=classic;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="3" target="18">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="34.325581395348834" y="80" as="sourcePoint"/>
|
||||
<mxPoint x="150.71428571428578" y="85" as="targetPoint"/>
|
||||
<Array as="points">
|
||||
<mxPoint x="34.25858250276859" y="130"/>
|
||||
<mxPoint x="89.96048726467328" y="130"/>
|
||||
<mxPoint x="155" y="130"/>
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="36" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="32" target="3">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="32" value="+" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#f5f5f5;fontColor=#333333;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="140" y="330" width="110" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="33" value="Token Emb" style="rounded=0;whiteSpace=wrap;html=1;strokeColor=#6c8ebf;fillColor=#dae8fc;" vertex="1" parent="1">
|
||||
<mxGeometry x="145" y="347.5" width="100" height="42.5" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="34" value="Position Emb" style="rounded=0;whiteSpace=wrap;html=1;strokeColor=#9673a6;fillColor=#e1d5e7;" vertex="1" parent="1">
|
||||
<mxGeometry x="145" y="430" width="100" height="42.5" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="46" style="edgeStyle=none;html=1;" edge="1" parent="1" source="37" target="40">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="37" value="Decoder" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="740" y="330" width="70" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="38" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="7" target="37">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="47" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="40" target="44">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="40" value="Decoder" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="840" y="330" width="70" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="49" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="41" target="42">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="41" value="Decoder" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="1000" y="330" width="70" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="52" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="42" target="50">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="42" value="Decoder" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="1100" y="330" width="70" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="48" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="44" target="41">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="44" value=".<div>.</div><div>.</div>" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="940" y="370" width="30" height="80" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="53" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="50" target="51">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="50" value="Linear" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;fontColor=#333333;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="1200" y="335" width="50" height="150" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="51" value="Softmax" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="1">
|
||||
<mxGeometry x="1286" y="335" width="50" height="150" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="54" value="Tokens" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="50" y="370" width="60" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="55" style="edgeStyle=none;html=1;entryX=-0.025;entryY=0.538;entryDx=0;entryDy=0;entryPerimeter=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="54">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="92.75" y="414.6694174757282" as="sourcePoint"/>
|
||||
<mxPoint x="140" y="415.33000000000004" as="targetPoint"/>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
73
assets/drawio/gpt11.drawio
Normal file
73
assets/drawio/gpt11.drawio
Normal file
@@ -0,0 +1,73 @@
|
||||
<mxfile host="65bd71144e">
|
||||
<diagram name="GPT-1 Architecture" id="0">
|
||||
<mxGraphModel dx="569" dy="414" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0"/>
|
||||
<mxCell id="1" parent="0"/>
|
||||
<mxCell id="2" value="Токены (индексы)" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="40" y="140" width="110" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="3" value="Token Embeddings" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="1" vertex="1">
|
||||
<mxGeometry x="170" y="100" width="110" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="4" value="Position Embeddings" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="1" vertex="1">
|
||||
<mxGeometry x="170" y="180" width="110" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="5" value="Сумма + Dropout" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#b9e97a;" parent="1" vertex="1">
|
||||
<mxGeometry x="310" y="140" width="120" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="6" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;html=1;" parent="1" source="2" target="3" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="7" style="edgeStyle=orthogonalEdgeStyle;endArrow=none;html=1;dashed=1;" parent="1" source="2" target="4" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="8" style="endArrow=block;html=1;" parent="1" source="3" target="5" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="9" style="endArrow=block;html=1;" parent="1" source="4" target="5" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="10" value="N× Decoder блок" style="rounded=1;whiteSpace=wrap;html=1;strokeColor=#cfbc96;fillColor=#fbe7b0;" parent="1" vertex="1">
|
||||
<mxGeometry x="460" y="80" width="280" height="160" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="16" value="Multi-Head Attention (h × HeadAttention, Concat, Linear, Dropout)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;" parent="10" vertex="1">
|
||||
<mxGeometry x="10" y="10" width="120" height="45" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="17" value="Residual + LayerNorm 1" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;" parent="10" vertex="1">
|
||||
<mxGeometry x="150" y="20" width="110" height="25" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="18" value="FeedForward (Linear-ReLU-Linear-Dropout)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="10" vertex="1">
|
||||
<mxGeometry x="10" y="95" width="120" height="45" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="19" value="Residual + LayerNorm 2" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;" parent="10" vertex="1">
|
||||
<mxGeometry x="150" y="105" width="110" height="25" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="20" style="endArrow=block;html=1;" parent="10" source="16" target="17" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="21" style="endArrow=block;html=1;" parent="10" source="17" target="18" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="22" style="endArrow=block;html=1;" parent="10" source="18" target="19" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="11" style="endArrow=block;html=1;" parent="1" source="5" target="10" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="12" value="Linear (emb→vocab)" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#e1d5e7;" parent="1" vertex="1">
|
||||
<mxGeometry x="760" y="140" width="120" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="13" style="endArrow=block;html=1;" parent="1" source="10" target="12" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="14" value="Логиты [batch, seq, vocab]" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="900" y="140" width="120" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="15" style="endArrow=block;html=1;" parent="1" source="12" target="14" edge="1">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
BIN
assets/models/gpt1.png
Normal file
BIN
assets/models/gpt1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
1
assets/models/gpt1.svg
Normal file
1
assets/models/gpt1.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 30 KiB |
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user