diff --git a/Mergekit.ipynb b/Mergekit.ipynb index 1abb204..af4746d 100644 --- a/Mergekit.ipynb +++ b/Mergekit.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "machine_shape": "hm", - "authorship_tag": "ABX9TyMuGevIbBdnvORov5ZLmtGx", + "authorship_tag": "ABX9TyNkCdo3uzEUbLA4CS6VfaEM", "include_colab_link": true }, "kernelspec": { @@ -1481,67 +1481,42 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NPNPie5Eo3EZ", - "outputId": "450c623b-7fc8-44df-c437-ea72b44a5a75" + "id": "NPNPie5Eo3EZ" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.1/258.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.4/395.4 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for mergekit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "lida 0.0.10 requires fastapi, which is not installed.\n", - "lida 0.0.10 requires kaleido, which is not installed.\n", - "lida 0.0.10 requires python-multipart, which is not installed.\n", - "lida 0.0.10 requires uvicorn, which is not installed.\n", - "llmx 0.0.15a0 requires cohere, which is not installed.\n", - "llmx 0.0.15a0 requires openai, which is not installed.\n", - "llmx 0.0.15a0 requires tiktoken, which is not installed.\n", - "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0m" - ] - } - ], + "outputs": [], "source": [ "!git clone https://github.com/cg123/mergekit.git\n", "%cd mergekit\n", - "!pip install -qe ." + "!pip install -e ." ] }, { "cell_type": "code", "source": [ - "MODEL_NAME = \"NeuralPipe-9B-merged\"\n", + "MODEL_NAME = \"Marcoro14-7B-slerp\"\n", "yaml_config = \"\"\"\n", "slices:\n", " - sources:\n", - " - model: OpenPipe/mistral-ft-optimized-1218\n", - " layer_range: [0, 32]\n", - " - sources:\n", - " - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n", - " layer_range: [24, 32]\n", - "merge_method: passthrough\n", + " - model: AIDC-ai-business/Marcoroni-7B-v3\n", + " layer_range: [0, 32]\n", + " - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n", + " layer_range: [0, 32]\n", + "merge_method: slerp\n", + "base_model: AIDC-ai-business/Marcoroni-7B-v3\n", + "parameters:\n", + " t:\n", + " - filter: self_attn\n", + " value: [0, 0.5, 0.3, 0.7, 1]\n", + " - filter: mlp\n", + " value: [1, 0.5, 0.7, 0.3, 0]\n", + " - value: 0.5\n", "dtype: bfloat16\n", "\"\"\"" ], "metadata": { "id": "LGd7jlfCpNcg" }, - "execution_count": null, + "execution_count": 21, "outputs": [] }, { @@ -1611,9 +1586,17 @@ "# Create a Jinja template object\n", "jinja_template = Template(template_text.strip())\n", "\n", + "# Get list of models from config\n", + "if \"models\" in data:\n", + " models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n", + "elif \"parameters\" in data:\n", + " models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n", + "elif \"slices\" in data:\n", + " models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n", + "else:\n", + " raise Exception(\"No models or slices found in yaml config\")\n", + "\n", "# Fill the template\n", - "models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n", - "# models = [\"OpenPipe/mistral-ft-optimized-1218\", \"mlabonne/NeuralHermes-2.5-Mistral-7B\"]\n", "content = jinja_template.render(\n", " model_name=MODEL_NAME,\n", " models=models,\n",