{ "raw_pairs": 12107, "duplicates_removed": 269, "training_pairs": 11838, "train_pairs": 10654, "eval_pairs": 1184, "sources": { "external:vendor-deep-dives.jsonl": 11200, "external:technical-deep-dives.jsonl": 84, "external:rir-infrastructure-data.jsonl": 150, "external:market-business-analysis-part1.jsonl": 10, "external:synthesized-training-samples.jsonl": 219, "external:nanog-ripe-labs-content.jsonl": 34, "external:academic-research-synthesis.jsonl": 109, "external:market-business-analysis-part6.jsonl": 5, "external:market-business-analysis-part5.jsonl": 7, "external:market-business-analysis-part4.jsonl": 5, "external:market-business-analysis-part2.jsonl": 8, "external:market-business-analysis-part3.jsonl": 7 }, "files": { "train": "training-data/runpod/tip_llm/tip_llm-sft-train.jsonl", "eval": "training-data/runpod/tip_llm/tip_llm-sft-eval.jsonl", "all": "training-data/runpod/tip_llm/tip_llm-sft-all.jsonl", "manifest": "training-data/runpod/tip_llm/manifest.json" } }