mirror of
https://github.com/pese-git/llm-arch-research.git
synced 2026-01-23 13:00:54 +00:00
- Added architecture diagrams for GPT-1: gpt1.drawio, gpt11.drawio (drawio format) - Exported visualization images: gpt1.png, gpt1.svg for documentation and presentations - Updated gpt.ipynb notebook to reference new materials and possibly add explanations of layers/logic - New assets help to clarify model structure and training flow for both contributors and external users
73 lines
5.2 KiB
Plaintext
73 lines
5.2 KiB
Plaintext
<mxfile host="65bd71144e">
|
||
<diagram name="GPT-1 Architecture" id="0">
|
||
<mxGraphModel dx="569" dy="414" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
||
<root>
|
||
<mxCell id="0"/>
|
||
<mxCell id="1" parent="0"/>
|
||
<mxCell id="2" value="Токены (индексы)" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||
<mxGeometry x="40" y="140" width="110" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="3" value="Token Embeddings" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="1" vertex="1">
|
||
<mxGeometry x="170" y="100" width="110" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="4" value="Position Embeddings" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="1" vertex="1">
|
||
<mxGeometry x="170" y="180" width="110" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="5" value="Сумма + Dropout" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#b9e97a;" parent="1" vertex="1">
|
||
<mxGeometry x="310" y="140" width="120" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="6" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;html=1;" parent="1" source="2" target="3" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="7" style="edgeStyle=orthogonalEdgeStyle;endArrow=none;html=1;dashed=1;" parent="1" source="2" target="4" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="8" style="endArrow=block;html=1;" parent="1" source="3" target="5" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="9" style="endArrow=block;html=1;" parent="1" source="4" target="5" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="10" value="N× Decoder блок" style="rounded=1;whiteSpace=wrap;html=1;strokeColor=#cfbc96;fillColor=#fbe7b0;" parent="1" vertex="1">
|
||
<mxGeometry x="460" y="80" width="280" height="160" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="16" value="Multi-Head Attention (h × HeadAttention, Concat, Linear, Dropout)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;" parent="10" vertex="1">
|
||
<mxGeometry x="10" y="10" width="120" height="45" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="17" value="Residual + LayerNorm 1" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;" parent="10" vertex="1">
|
||
<mxGeometry x="150" y="20" width="110" height="25" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="18" value="FeedForward (Linear-ReLU-Linear-Dropout)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;" parent="10" vertex="1">
|
||
<mxGeometry x="10" y="95" width="120" height="45" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="19" value="Residual + LayerNorm 2" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;" parent="10" vertex="1">
|
||
<mxGeometry x="150" y="105" width="110" height="25" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="20" style="endArrow=block;html=1;" parent="10" source="16" target="17" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="21" style="endArrow=block;html=1;" parent="10" source="17" target="18" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="22" style="endArrow=block;html=1;" parent="10" source="18" target="19" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="11" style="endArrow=block;html=1;" parent="1" source="5" target="10" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="12" value="Linear (emb→vocab)" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#e1d5e7;" parent="1" vertex="1">
|
||
<mxGeometry x="760" y="140" width="120" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="13" style="endArrow=block;html=1;" parent="1" source="10" target="12" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="14" value="Логиты [batch, seq, vocab]" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||
<mxGeometry x="900" y="140" width="120" height="40" as="geometry"/>
|
||
</mxCell>
|
||
<mxCell id="15" style="endArrow=block;html=1;" parent="1" source="12" target="14" edge="1">
|
||
<mxGeometry relative="1" as="geometry"/>
|
||
</mxCell>
|
||
</root>
|
||
</mxGraphModel>
|
||
</diagram>
|
||
</mxfile> |