Skip to content

Commit

Permalink
Created using Colab
Browse files Browse the repository at this point in the history
  • Loading branch information
ggbetz committed Apr 16, 2024
1 parent cda1376 commit f6d526a
Showing 1 changed file with 230 additions and 0 deletions.
230 changes: 230 additions & 0 deletions notebooks/CoT_Leaderboard_Reasoning_Traces_Length.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNUgf6Q2DjFMgz2E0KQ20rT",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/logikon-ai/cot-eval/blob/main/notebooks/CoT_Leaderboard_Reasoning_Traces_Length.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Explore the length of reasoning traces generated by different models"
],
"metadata": {
"id": "oQr5G_VwR0ld"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ai1tt8A0Riff",
"outputId": "49455e9e-d7ca-4044-ca7b-a9afed3ac7d8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m388.9/388.9 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"google-colab 1.0.0 requires pandas==2.0.3, but you have pandas 2.2.2 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0m"
]
}
],
"source": [
"%pip install -Uq datasets huggingface_hub pandas"
]
},
{
"cell_type": "code",
"source": [
"import getpass\n",
"from huggingface_hub import HfApi, login\n",
"\n",
"HF_TOKEN = getpass.getpass(\"HF Token:\")\n",
"API = HfApi(token=HF_TOKEN)\n",
"login(HF_TOKEN, add_to_git_credential=True)"
],
"metadata": {
"id": "JorJrHE6R7UJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Load traces"
],
"metadata": {
"id": "VrWiLRMYRySy"
}
},
{
"cell_type": "code",
"source": [
"TRACES_DATASET = \"cot-leaderboard/cot-eval-traces-2.0\""
],
"metadata": {
"id": "GsV_-du4Rs-D"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import datasets\n",
"import pandas as pd\n",
"\n",
"df_cottraces = pd.DataFrame(datasets.load_dataset(TRACES_DATASET)[\"test\"])\n",
"\n",
"df_cottraces = pd.concat([\n",
" df_cottraces,\n",
" pd.DataFrame((df_cottraces.config_data.apply(lambda x: dict(x)).to_list()))\n",
"], axis=1)\n"
],
"metadata": {
"id": "ahGOIg8lR-pA"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Add reasoning traces len (chars as columns)"
],
"metadata": {
"id": "Py6820PoSOW6"
}
},
{
"cell_type": "code",
"source": [
"df_cottraces[\"r_length\"] = df_cottraces.reasoning_trace.str.len()\n",
"df_cottraces[\"r_empty\"] = df_cottraces.r_length < 3. # virtually empty traces\n",
"df_cottraces.r_length.value_counts().sort_index()"
],
"metadata": {
"id": "4_MFxZWvR-lq"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# average ratio of empty traces per model\n",
"df_cottraces.groupby([\"model\"])[[\"r_empty\"]].mean()"
],
"metadata": {
"id": "RN5weiC1R-gF"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"|model|r\\_empty|\n",
"|---|---|\n",
"|01-ai/Yi-34B|0\\.6977445172019541|\n",
"|01-ai/Yi-6B|0\\.9766656272736721|\n",
"|Deci/DeciLM-7B|0\\.0|\n",
"|Deci/DeciLM-7B-instruct|0\\.18127013823926827|\n",
"|HuggingFaceH4/zephyr-7b-beta|0\\.0|\n",
"|Intel/neural-chat-7b-v3-1|0\\.0|\n",
"|Locutusque/Hercules-4\\.0-Mistral-v0\\.2-7B|0\\.2919654921525829|\n",
"|Locutusque/OpenCerebrum-1\\.0-7b-DPO|0\\.0|\n",
"|NousResearch/Hermes-2-Pro-Mistral-7B|0\\.00363787548071926|\n",
"|NousResearch/Nous-Hermes-llama-2-7b|0\\.008938779752624467|\n",
"|OpenBuddy/openbuddy-mistral2-7b-v20\\.2-32k|0\\.21130859578006445|\n",
"|Qwen/Qwen1\\.5-14B|0\\.6211932231576759|\n",
"|allenai/OLMo-1B|0\\.00015590894917368256|\n",
"|allenai/tulu-2-13b|0\\.094792641097599|\n",
"|allenai/tulu-2-70b|0\\.07140629872154662|\n",
"|allenai/tulu-2-7b|0\\.09775491113189898|\n",
"|allenai/tulu-2-dpo-13b|0\\.10087309011537263|\n",
"|allenai/tulu-2-dpo-70b|0\\.010030142396840245|\n",
"|allenai/tulu-2-dpo-7b|0\\.024425735370543603|\n",
"|databricks/dolly-v2-3b|0\\.0|\n",
"|google/gemma-2b|0\\.17035651179711048|\n",
"|google/gemma-2b-it|0\\.00020787859889824343|\n",
"|google/gemma-7b|0\\.021047708138447148|\n",
"|google/gemma-7b-it|0\\.0|\n",
"|ichigoberry/pandafish-2-7b-32k|0\\.0|\n",
"|meta-llama/Llama-2-13b-chat-hf|0\\.007223781311713959|\n",
"|meta-llama/Llama-2-13b-hf|0\\.04006859993763642|\n",
"|meta-llama/Llama-2-70b-chat-hf|0\\.0010393929944912171|\n",
"|meta-llama/Llama-2-70b-hf|0\\.9658559401309635|\n",
"|meta-llama/Llama-2-7b-hf|0\\.948757925371583|\n",
"|microsoft/Orca-2-13b|0\\.00015590894917368256|\n",
"|microsoft/Orca-2-7b|0\\.16240515538925268|\n",
"|microsoft/phi-2|0\\.0|\n",
"|mistralai/Mistral-7B-Instruct-v0\\.2|0\\.0|\n",
"|mistralai/Mistral-7B-v0\\.1|0\\.41601222790280923|\n",
"|mistralai/Mixtral-8x7B-Instruct-v0\\.1|0\\.0|\n",
"|mistralai/Mixtral-8x7B-v0\\.1|0\\.5075355992100613|\n",
"|openbmb/Eurus-70b-sft|0\\.261303398815092|\n",
"|openbmb/Eurus-7b-kto|0\\.0|\n",
"|openchat/openchat-3\\.5-0106|0\\.0|\n",
"|teknium/OpenHermes-2\\.5-Mistral-7B|0\\.12722170252572498|\n",
"|upstage/SOLAR-10\\.7B-Instruct-v1\\.0|0\\.0|\n",
"|upstage/SOLAR-10\\.7B-v1\\.0|0\\.6115268683089076|"
],
"metadata": {
"id": "xWW1zB_LS0KK"
}
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "KC_LnebpSxYz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "viKtXcRsR-Ya"
},
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit f6d526a

Please sign in to comment.