NeurIPS 2025 E2LM Competition: Early Training Evaluation of Language Models Leaderboard

{
  • "headers": [
    • "Submission ID",
    • "Team",
    • "Submitter",
    • "Signal Quality",
    • "Ranking Consistency",
    • "Scientific Compliance",
    • "Global Score",
    • "Submission Time",
    • "Description"
    ],
  • "data": [
    • [
      • "e1490cde32",
      • "morai",
      • "giovanivaldrighi",
      • 0.915,
      • 0.885,
      • 0.638,
      • 0.801,
      • "2025-10-21T21:13:30",
      • "mmlu_v15"
      ],
    • [
      • "dc6b27cb29",
      • "morai",
      • "giovanivaldrighi",
      • 0.935,
      • 0.928,
      • 0.554,
      • 0.782,
      • "2025-10-01T22:35:50",
      • "MMLU small with ll diff (v1)"
      ],
    • [
      • "cd9b3b9524",
      • "morai",
      • "giovanivaldrighi",
      • 0.921,
      • 0.834,
      • 0.587,
      • 0.779,
      • "2025-10-22T11:57:41",
      • "mmlu_v16"
      ],
    • [
      • "8d5e3c3934",
      • "morai",
      • "giovanivaldrighi",
      • 0.902,
      • 0.899,
      • 0.575,
      • 0.771,
      • "2025-10-03T01:42:42",
      • "MMLU small with ll diff (v2)"
      ],
    • [
      • "984384485e",
      • "morai",
      • "giovanivaldrighi",
      • 0.919,
      • 0.957,
      • 0.534,
      • 0.769,
      • "2025-09-30T23:20:17",
      • "MMLU with ll diff (v1)"
      ],
    • [
      • "ed491308f5",
      • "morai",
      • "giovanivaldrighi",
      • 0.893,
      • 0.91,
      • 0.574,
      • 0.767,
      • "2025-10-06T17:51:57",
      • "MMLU with ll diff norm"
      ],
    • [
      • "e01740670c",
      • "morai",
      • "giovanivaldrighi",
      • 0.934,
      • 0.935,
      • 0.511,
      • 0.765,
      • "2025-10-21T20:57:24",
      • "mmlu_v14"
      ],
    • [
      • "6e373496a0",
      • "morai",
      • "giovanivaldrighi",
      • 0.889,
      • 0.773,
      • 0.605,
      • 0.764,
      • "2025-10-10T22:08:17",
      • "mmlu_v9"
      ],
    • [
      • "76e438a046",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.926,
      • 0.752,
      • 0.548,
      • 0.758,
      • "2025-10-13T20:53:23",
      • "0007"
      ],
    • [
      • "a4395863a4",
      • "morai",
      • "giovanivaldrighi",
      • 0.902,
      • 0.776,
      • 0.572,
      • 0.758,
      • "2025-10-17T21:35:04",
      • "mmlu_v12"
      ],
    • [
      • "2bbe5c8486",
      • "Scitix",
      • "Lie24",
      • 0.947,
      • 0.936,
      • 0.471,
      • 0.756,
      • "2025-10-20T09:30:22",
      • "1"
      ],
    • [
      • "aff36100ca",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.928,
      • 0.73,
      • 0.542,
      • 0.754,
      • "2025-09-25T21:16:33",
      • "MMLU_var new"
      ],
    • [
      • "440612e173",
      • "Nobelist",
      • "pkuHaowei",
      • 0.95,
      • 0.858,
      • 0.476,
      • 0.751,
      • "2025-10-21T08:41:30",
      • ""
      ],
    • [
      • "78710f0a1c",
      • "Nobelist",
      • "pkuHaowei",
      • 0.95,
      • 0.858,
      • 0.476,
      • 0.751,
      • "2025-09-25T16:44:33",
      • ""
      ],
    • [
      • "3e96b7b636",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.861,
      • 0.887,
      • 0.574,
      • 0.749,
      • "2025-09-28T22:07:45",
      • "0001"
      ],
    • [
      • "a3deb9d739",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.916,
      • 0.83,
      • 0.509,
      • 0.744,
      • "2025-10-11T08:56:58",
      • "0005"
      ],
    • [
      • "57a1db9bf2",
      • "Scitix",
      • "Lie24",
      • 0.954,
      • 0.972,
      • 0.423,
      • 0.743,
      • "2025-10-23T13:38:14",
      • "6"
      ],
    • [
      • "5d9e22bec2",
      • "Nobelist",
      • "pkuHaowei",
      • 0.925,
      • 0.865,
      • 0.481,
      • 0.741,
      • "2025-10-27T10:06:19",
      • "revert-42-correct"
      ],
    • [
      • "eeab1413e7",
      • "morai",
      • "giovanivaldrighi",
      • 0.849,
      • 0.748,
      • 0.582,
      • 0.732,
      • "2025-10-15T12:24:27",
      • "mmlu_v10"
      ],
    • [
      • "60d985bc28",
      • "Scitix",
      • "Lie24",
      • 0.979,
      • 0.986,
      • 0.357,
      • 0.731,
      • "2025-10-20T11:14:45",
      • "5"
      ],
    • [
      • "c1baf2ca4e",
      • "morai",
      • "giovanivaldrighi",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-11T21:36:13",
      • "MMLU with fixed perplexity (v2)"
      ],
    • [
      • "95de27dbcb",
      • "morai",
      • "giovanivaldrighi",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-12T21:00:14",
      • "MMLU with fixed perplexity (v3)"
      ],
    • [
      • "c7654b1abc",
      • "ShAIkespear",
      • "ESmike",
      • 0.959,
      • 0.837,
      • 0.419,
      • 0.731,
      • "2025-09-16T20:39:51",
      • "MMLU_var"
      ],
    • [
      • "18ce18709a",
      • "Nobelist",
      • "pkuHaowei",
      • 0.956,
      • 0.841,
      • 0.419,
      • 0.729,
      • "2025-08-19T06:05:53",
      • "Another MMLU-Var test with updated hf_token"
      ],
    • [
      • "fb1bc9bdad",
      • "ShAIkespear",
      • "ESmike",
      • 0.958,
      • 0.823,
      • 0.42,
      • 0.729,
      • "2025-09-17T21:02:04",
      • "MMLU_var 2.0"
      ],
    • [
      • "56ee9534e6",
      • "morai",
      • "giovanivaldrighi",
      • 0.888,
      • 0.919,
      • 0.474,
      • 0.725,
      • "2025-10-07T00:57:47",
      • "MMLU full with ll diff norm"
      ],
    • [
      • "7c4156c0b4",
      • "Nobelist",
      • "pkuHaowei",
      • 0.955,
      • 0.872,
      • 0.4,
      • 0.724,
      • "2025-09-23T16:20:43",
      • "mmlu-var new v2.0"
      ],
    • [
      • "8fe150c749",
      • "Scitix",
      • "Lie24",
      • 0.971,
      • 0.979,
      • 0.347,
      • 0.722,
      • "2025-10-20T11:14:20",
      • "4"
      ],
    • [
      • "6fafe06861",
      • "Nobelist",
      • "pkuHaowei",
      • 0.937,
      • 0.83,
      • 0.428,
      • 0.722,
      • "2025-08-27T13:51:43",
      • "Reduce dataset size + focusing on STEM"
      ],
    • [
      • "71be160599",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.964,
      • 0.8,
      • 0.401,
      • 0.722,
      • "2025-10-08T20:39:59",
      • "0004"
      ],
    • [
      • "ccbf3bf7b4",
      • "morai",
      • "giovanivaldrighi",
      • 0.947,
      • 0.886,
      • 0.394,
      • 0.72,
      • "2025-10-24T19:57:04",
      • "mmlu_v17"
      ],
    • [
      • "1bc07adbed",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.905,
      • 0.851,
      • 0.452,
      • 0.718,
      • "2025-10-08T20:33:25",
      • "0003"
      ],
    • [
      • "25fea29fad",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.924,
      • 0.928,
      • 0.403,
      • 0.716,
      • "2025-10-23T20:15:05",
      • "0013"
      ],
    • [
      • "e3c8ba2e0b",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.933,
      • 0.735,
      • 0.44,
      • 0.716,
      • "2025-10-18T20:47:17",
      • "0010"
      ],
    • [
      • "d4bb266f7e",
      • "Scitix",
      • "Lie24",
      • 0.971,
      • 0.986,
      • 0.33,
      • 0.716,
      • "2025-10-15T12:04:38",
      • "test"
      ],
    • [
      • "c31400f3ef",
      • "morai",
      • "giovanivaldrighi",
      • 0.912,
      • 0.744,
      • 0.459,
      • 0.714,
      • "2025-10-24T20:25:15",
      • "mmlu_v18"
      ],
    • [
      • "9adf239175",
      • "Nobelist",
      • "pkuHaowei",
      • 0.87,
      • 0.801,
      • 0.494,
      • 0.713,
      • "2025-09-19T17:04:06",
      • "MMLU-var new"
      ],
    • [
      • "cd86d28256",
      • "ShAIkespear",
      • "ESmike",
      • 0.956,
      • 0.737,
      • 0.396,
      • 0.71,
      • "2025-09-25T13:37:29",
      • "MMLU_var 3.0"
      ],
    • [
      • "29c501c195",
      • "Episteme",
      • "1OOl",
      • 0.938,
      • 0.842,
      • 0.365,
      • 0.699,
      • "2025-10-20T21:25:06",
      • "exp004"
      ],
    • [
      • "815cca37c4",
      • "Scitix",
      • "Lie24",
      • 0.922,
      • 0.906,
      • 0.364,
      • 0.697,
      • "2025-10-27T09:03:08",
      • "V2.1"
      ],
    • [
      • "1c3036c0bb",
      • "morai",
      • "giovanivaldrighi",
      • 0.974,
      • 0.815,
      • 0.319,
      • 0.696,
      • "2025-10-16T18:12:54",
      • "mmlu_v11"
      ],
    • [
      • "aae73d095e",
      • "DARG",
      • "imberator",
      • 0.939,
      • 0.85,
      • 0.342,
      • 0.691,
      • "2025-10-13T16:40:52",
      • "MMLU var subset"
      ],
    • [
      • "d5b347e711",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.846,
      • 0.737,
      • 0.447,
      • 0.676,
      • "2025-10-02T18:18:52",
      • "0002"
      ],
    • [
      • "79ff9c16ed",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.792,
      • 0.761,
      • 0.482,
      • 0.665,
      • "2025-10-27T19:24:41",
      • "0015"
      ],
    • [
      • "2eb78a7263",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.874,
      • 0.847,
      • 0.348,
      • 0.661,
      • "2025-10-19T09:37:08",
      • "0011"
      ],
    • [
      • "8b2821e989",
      • "ShAIkespear",
      • "ESmike",
      • 0.817,
      • 0.742,
      • 0.369,
      • 0.63,
      • "2025-10-20T22:57:25",
      • "0009"
      ],
    • [
      • "b695ee644e",
      • "morai",
      • "giovanivaldrighi",
      • 0.741,
      • 0.929,
      • 0.376,
      • 0.614,
      • "2025-09-26T14:06:20",
      • "MMLU Var with constrained prompt"
      ],
    • [
      • "6ee7646bce",
      • "morai",
      • "giovanivaldrighi",
      • 0.845,
      • 0.843,
      • 0.264,
      • 0.612,
      • "2025-10-09T17:39:51",
      • "mmlu_v8"
      ],
    • [
      • "4616bc30c4",
      • "Nobelist",
      • "pkuHaowei",
      • 0.63,
      • 0.63,
      • 0.534,
      • 0.592,
      • "2025-10-23T18:34:54",
      • ""
      ],
    • [
      • "4d37e4aeeb",
      • "Cruise",
      • "wmere",
      • 0.614,
      • 0.704,
      • 0.454,
      • 0.559,
      • "2025-09-29T08:06:59",
      • ""
      ],
    • [
      • "376e97c0a0",
      • "Nobelist",
      • "pkuHaowei",
      • 0.592,
      • 0.32,
      • 0.511,
      • 0.533,
      • "2025-10-23T18:57:01",
      • "strategy balance"
      ],
    • [
      • "858c964f59",
      • "Nobelist",
      • "pkuHaowei",
      • 0.592,
      • 0.32,
      • 0.511,
      • 0.533,
      • "2025-10-27T10:03:55",
      • "revert-birth"
      ],
    • [
      • "9f9b33f7c8",
      • "Episteme",
      • "1OOl",
      • 0.567,
      • 0.781,
      • 0.342,
      • 0.498,
      • "2025-10-20T22:20:17",
      • "exp008"
      ],
    • [
      • "63bf359ae7",
      • "Episteme",
      • "1OOl",
      • 0.528,
      • 0.76,
      • 0.048,
      • 0.359,
      • "2025-10-27T00:01:21",
      • "exp005_2"
      ],
    • [
      • "60c6942c7b",
      • "Scitix",
      • "Lie24",
      • 0.471,
      • 0.471,
      • 0.165,
      • 0.349,
      • "2025-10-20T11:13:55",
      • "3"
      ],
    • [
      • "b74f0486fd",
      • "Scitix",
      • "Lie24",
      • 0.471,
      • 0.471,
      • 0.165,
      • 0.349,
      • "2025-10-20T11:13:25",
      • "2"
      ],
    • [
      • "acce8d8af8",
      • "ShAIkespear",
      • "ESmike",
      • 0.399,
      • 0.715,
      • 0.194,
      • 0.348,
      • "2025-10-16T17:24:28",
      • "0006"
      ],
    • [
      • "bff4cf1ff9",
      • "Scitix",
      • "Lie24",
      • 0.29,
      • 0.622,
      • 0.271,
      • 0.316,
      • "2025-10-16T12:08:59",
      • "3"
      ],
    • [
      • "5845c5c8be",
      • "morai",
      • "giovanivaldrighi",
      • 0.265,
      • 0.722,
      • 0.236,
      • 0.299,
      • "2025-08-22T11:53:41",
      • "Task focused in STEM topics for MMLU using prompt engineering."
      ],
    • [
      • "319b1a3a3a",
      • "thu-let_it_be",
      • "jiangyi233",
      • 0.182,
      • 0.555,
      • 0.286,
      • 0.261,
      • "2025-10-24T03:26:30",
      • "100(2)"
      ],
    • [
      • "c3017d1ddf",
      • "thu-let_it_be",
      • "jiangyi233",
      • 0.182,
      • 0.555,
      • 0.286,
      • 0.261,
      • "2025-10-24T03:26:23",
      • "100(2)"
      ],
    • [
      • "3c4c5eac04",
      • "thu-let_it_be",
      • "jiangyi233",
      • 0.173,
      • 0.546,
      • 0.262,
      • 0.246,
      • "2025-10-26T15:53:34",
      • "1026-1400"
      ],
    • [
      • "6ea21350e8",
      • "Episteme",
      • "1OOl",
      • 0.305,
      • 0.667,
      • 0.057,
      • 0.242,
      • "2025-10-19T20:15:04",
      • "exp002"
      ],
    • [
      • "11619a9444",
      • "ShAIkespear",
      • "DaGrapix",
      • 0.386,
      • 0.477,
      • 0,
      • 0.241,
      • "2025-09-19T15:12:13",
      • "MMLU_var new"
      ],
    • [
      • "c619560da1",
      • "Episteme",
      • "1OOl",
      • 0.321,
      • 0.552,
      • 0,
      • 0.215,
      • "2025-10-20T21:55:19",
      • "exp006"
      ],
    • [
      • "22fb4b9aa6",
      • "ShAIkespear",
      • "ESmike",
      • 0.162,
      • 0.488,
      • 0.071,
      • 0.158,
      • "2025-09-10T19:25:19",
      • "Second Test"
      ]
    ],
  • "metadata": null
}