refactor(experiments): migrate to universal runner + config structure, remove legacy scripts

- add universal runner run_llm_experiment.py with JSON-config driven LLM training / generation - add configs for gpt, gpt2, llama (training/generation) - remove individual train/generate scripts for each model - update README with simple how-to for experiments block BREAKING CHANGE: all llm_only experiments now run only through run_llm_experiment.py; legacy scripts removed
2026-01-23 21:10:54 +00:00 · 2025-10-14 11:57:23 +03:00
parent 0cc7850848
commit 3e4815fcc6
13 changed files with 360 additions and 1350 deletions
--- a/experiments/llm_only/configs/gpt2_generate.json
+++ b/experiments/llm_only/configs/gpt2_generate.json
@@ -0,0 +1,19 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "test_prompts": [
+      "Нейронные сети",
+      "Обработка естественного языка",
+      "GPT-2 — это"
+    ],
+    "model_config_path": "checkpoints/gpt2-bpe/config.json",
+    "model_weights": "checkpoints/gpt2-bpe/model.pt",
+    "generation": {
+      "max_new_tokens": 40,
+      "temperature": 0.8,
+      "do_sample": true,
+      "top_k": null,
+      "top_p": null
+    },
+    "log_path": "checkpoints/llm_only_generation_logs.json"
+  }
+  
--- a/experiments/llm_only/configs/gpt2_train.json
+++ b/experiments/llm_only/configs/gpt2_train.json
@@ -0,0 +1,23 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "bpe_vocab_size": 1000,
+    "bpe_special_tokens": ["<pad>", "<unk>", "<bos>", "<eos>"],
+    "test_prompts": ["Искусственный интеллект", "Python — это"],
+    "model_config": {
+      "vocab_size": null,
+      "embed_dim": 256,
+      "num_heads": 4,
+      "num_layers": 4,
+      "max_position_embeddings": 128,
+      "dropout": 0.1
+    },
+    "model_weights": "checkpoints/gpt2-bpe/model.pt",
+    "model_config_path": "checkpoints/gpt2-bpe/config.json",
+    "training": {
+      "learning_rate": 0.0003,
+      "batch_size": 2,
+      "num_epochs": 3,
+      "warmup_steps": 50
+    },
+    "log_path": "checkpoints/gpt2_only_training_logs.json"
+  }
--- a/experiments/llm_only/configs/gpt_generate.json
+++ b/experiments/llm_only/configs/gpt_generate.json
@@ -0,0 +1,19 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "test_prompts": [
+      "The neural network",
+      "Transformer architecture",
+      "GPT models are"
+    ],
+    "model_config_path": "checkpoints/gpt-bpe/config.json",
+    "model_weights": "checkpoints/gpt-bpe/model.pt",
+    "generation": {
+      "max_new_tokens": 40,
+      "temperature": 0.8,
+      "do_sample": true,
+      "top_k": null,
+      "top_p": null
+    },
+    "log_path": "checkpoints/llm_only_generation_logs.json"
+  }
+  
--- a/experiments/llm_only/configs/gpt_train.json
+++ b/experiments/llm_only/configs/gpt_train.json
@@ -0,0 +1,23 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "bpe_vocab_size": 1000,
+    "bpe_special_tokens": ["<pad>", "<unk>", "<bos>", "<eos>"],
+    "test_prompts": ["GPT language model", "Machine learning basics"],
+    "model_config": {
+      "vocab_size": null,
+      "embed_dim": 256,
+      "num_heads": 4,
+      "num_layers": 4,
+      "max_position_embeddings": 128,
+      "dropout": 0.1
+    },
+    "model_weights": "checkpoints/gpt-bpe/model.pt",
+    "model_config_path": "checkpoints/gpt-bpe/config.json",
+    "training": {
+      "learning_rate": 0.0003,
+      "batch_size": 2,
+      "num_epochs": 3,
+      "warmup_steps": 50
+    },
+    "log_path": "checkpoints/gpt_only_training_logs.json"
+  }
--- a/experiments/llm_only/configs/llama_generate.json
+++ b/experiments/llm_only/configs/llama_generate.json
@@ -0,0 +1,19 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "test_prompts": [
+      "Open weights",
+      "The Llama model is",
+      "Efficient transformers"
+    ],
+    "model_config_path": "checkpoints/llama-bpe/config.json",
+    "model_weights": "checkpoints/llama-bpe/model.pt",
+    "generation": {
+      "max_new_tokens": 40,
+      "temperature": 0.8,
+      "do_sample": true,
+      "top_k": null,
+      "top_p": null
+    },
+    "log_path": "checkpoints/llm_only_generation_logs.json"
+  }
+  
--- a/experiments/llm_only/configs/llama_train.json
+++ b/experiments/llm_only/configs/llama_train.json
@@ -0,0 +1,23 @@
+{
+    "bpe_tokenizer": "checkpoints/bpe_tokenizer.json",
+    "bpe_vocab_size": 1000,
+    "bpe_special_tokens": ["<pad>", "<unk>", "<bos>", "<eos>"],
+    "test_prompts": ["Open source AI", "What is Llama?"],
+    "model_config": {
+      "vocab_size": null,
+      "embed_dim": 256,
+      "num_heads": 4,
+      "num_layers": 4,
+      "max_position_embeddings": 128,
+      "dropout": 0.1
+    },
+    "model_weights": "checkpoints/llama-bpe/model.pt",
+    "model_config_path": "checkpoints/llama-bpe/config.json",
+    "training": {
+      "learning_rate": 0.0003,
+      "batch_size": 2,
+      "num_epochs": 3,
+      "warmup_steps": 50
+    },
+    "log_path": "checkpoints/llama_only_training_logs.json"
+  }