2026-04-25 12:21:56 +02:00

28 lines
1.0 KiB
JSON

{
"raw_pairs": 12107,
"duplicates_removed": 269,
"training_pairs": 11838,
"train_pairs": 10654,
"eval_pairs": 1184,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"external:technical-deep-dives.jsonl": 84,
"external:rir-infrastructure-data.jsonl": 150,
"external:market-business-analysis-part1.jsonl": 10,
"external:synthesized-training-samples.jsonl": 219,
"external:nanog-ripe-labs-content.jsonl": 34,
"external:academic-research-synthesis.jsonl": 109,
"external:market-business-analysis-part6.jsonl": 5,
"external:market-business-analysis-part5.jsonl": 7,
"external:market-business-analysis-part4.jsonl": 5,
"external:market-business-analysis-part2.jsonl": 8,
"external:market-business-analysis-part3.jsonl": 7
},
"files": {
"train": "training-data/runpod/tip_llm/tip_llm-sft-train.jsonl",
"eval": "training-data/runpod/tip_llm/tip_llm-sft-eval.jsonl",
"all": "training-data/runpod/tip_llm/tip_llm-sft-all.jsonl",
"manifest": "training-data/runpod/tip_llm/manifest.json"
}
}