NeurIPS 2025 E2LM Competition: Early Training Evaluation of Language Models Leaderboard

{
  • "headers": [
    • "Submission ID",
    • "Team",
    • "Submitter",
    • "Signal Quality",
    • "Ranking Consistency",
    • "Scientific Compliance",
    • "Global Score",
    • "Submission Time",
    • "Description"
    ],
  • "data": [
    • [
      • "dc6b27cb29",
      • "morai",
      • "giovanivaldrighi",
      • 0.935,
      • 0.928,
      • 0.554,
      • 0.782,
      • "2025-10-01T22:35:50",
      • "MMLU small with ll diff (v1)"
      ],
    • [
      • "8d5e3c3934",
      • "morai",
      • "giovanivaldrighi",
      • 0.902,
      • 0.899,
      • 0.575,
      • 0.771,
      • "2025-10-03T01:42:42",
      • "MMLU small with ll diff (v2)"
      ],
    • [
      • "984384485e",
      • "morai",
      • "giovanivaldrighi",
      • 0.919,
      • 0.957,
      • 0.534,
      • 0.769,
      • "2025-09-30T23:20:17",
      • "MMLU with ll diff (v1)"
      ],
    • [
      • "ed491308f5",
      • "morai",
      • "giovanivaldrighi",
      • 0.893,
      • 0.91,
      • 0.574,
      • 0.767,
      • "2025-10-06T17:51:57",
      • "MMLU with ll diff norm"
      ],
    • [
      • "aff36100ca",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.928,
      • 0.73,
      • 0.542,
      • 0.754,
      • "2025-09-25T21:16:33",
      • "MMLU_var new"
      ],
    • [
      • "78710f0a1c",
      • "Nobelist",
      • "pkuHaowei",
      • 0.95,
      • 0.858,
      • 0.476,
      • 0.751,
      • "2025-09-25T16:44:33",
      • ""
      ],
    • [
      • "3e96b7b636",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.861,
      • 0.887,
      • 0.574,
      • 0.749,
      • "2025-09-28T22:07:45",
      • "0001"
      ],
    • [
      • "95de27dbcb",
      • "morai",
      • "giovanivaldrighi",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-12T21:00:14",
      • "MMLU with fixed perplexity (v3)"
      ],
    • [
      • "c1baf2ca4e",
      • "morai",
      • "giovanivaldrighi",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-11T21:36:13",
      • "MMLU with fixed perplexity (v2)"
      ],
    • [
      • "c7654b1abc",
      • "ShAIkespear",
      • "ESmike",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-16T20:39:51",
      • "MMLU_var"
      ],
    • [
      • "fb1bc9bdad",
      • "ShAIkespear",
      • "ESmike",
      • 0.958,
      • 0.823,
      • 0.42,
      • 0.729,
      • "2025-09-17T21:02:04",
      • "MMLU_var 2.0"
      ],
    • [
      • "18ce18709a",
      • "Nobelist",
      • "pkuHaowei",
      • 0.956,
      • 0.841,
      • 0.419,
      • 0.729,
      • "2025-08-19T06:05:53",
      • "Another MMLU-Var test with updated hf_token"
      ],
    • [
      • "56ee9534e6",
      • "morai",
      • "giovanivaldrighi",
      • 0.888,
      • 0.919,
      • 0.474,
      • 0.725,
      • "2025-10-07T00:57:47",
      • "MMLU full with ll diff norm"
      ],
    • [
      • "7c4156c0b4",
      • "Nobelist",
      • "pkuHaowei",
      • 0.955,
      • 0.872,
      • 0.4,
      • 0.724,
      • "2025-09-23T16:20:43",
      • "mmlu-var new v2.0"
      ],
    • [
      • "6fafe06861",
      • "Nobelist",
      • "pkuHaowei",
      • 0.937,
      • 0.83,
      • 0.428,
      • 0.722,
      • "2025-08-27T13:51:43",
      • "Reduce dataset size + focusing on STEM"
      ],
    • [
      • "9adf239175",
      • "Nobelist",
      • "pkuHaowei",
      • 0.87,
      • 0.801,
      • 0.494,
      • 0.713,
      • "2025-09-19T17:04:06",
      • "MMLU-var new"
      ],
    • [
      • "cd86d28256",
      • "ShAIkespear",
      • "ESmike",
      • 0.956,
      • 0.737,
      • 0.396,
      • 0.71,
      • "2025-09-25T13:37:29",
      • "MMLU_var 3.0"
      ],
    • [
      • "d5b347e711",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.846,
      • 0.737,
      • 0.447,
      • 0.676,
      • "2025-10-02T18:18:52",
      • "0002"
      ],
    • [
      • "b695ee644e",
      • "morai",
      • "giovanivaldrighi",
      • 0.741,
      • 0.929,
      • 0.376,
      • 0.614,
      • "2025-09-26T14:06:20",
      • "MMLU Var with constrained prompt"
      ],
    • [
      • "4d37e4aeeb",
      • "Cruise",
      • "wmere",
      • 0.614,
      • 0.704,
      • 0.454,
      • 0.559,
      • "2025-09-29T08:06:59",
      • ""
      ],
    • [
      • "5845c5c8be",
      • "morai",
      • "giovanivaldrighi",
      • 0.265,
      • 0.722,
      • 0.236,
      • 0.299,
      • "2025-08-22T11:53:41",
      • "Task focused in STEM topics for MMLU using prompt engineering."
      ],
    • [
      • "22fb4b9aa6",
      • "ShAIkespear",
      • "ESmike",
      • 0.162,
      • 0.488,
      • 0.071,
      • 0.158,
      • "2025-09-10T19:25:19",
      • "Second Test"
      ],
    • [
      • "11619a9444",
      • "ShAIkespear",
      • "DaGrapix",
      • null,
      • 1,
      • 0,
      • null,
      • "2025-09-19T15:12:13",
      • "MMLU_var new"
      ]
    ],
  • "metadata": null
}