diff --git a/.gitignore b/.gitignore index a3d57dd..f72ed37 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,9 @@ notebooks/__pycache__/ notebooks/state_of_the_union.txt notebooks/chroma_logs.log notebooks/.chroma/ +notebooks/local_notebooks/ notebooks/.env +pages/research/local_research/ .DS_Store .vscode diff --git a/img/olmo/dolma-dataset.png b/img/olmo/dolma-dataset.png new file mode 100644 index 0000000..34d9477 Binary files /dev/null and b/img/olmo/dolma-dataset.png differ diff --git a/img/olmo/olmo-models.png b/img/olmo/olmo-models.png new file mode 100644 index 0000000..3c9bf49 Binary files /dev/null and b/img/olmo/olmo-models.png differ diff --git a/img/olmo/olmo-results.png b/img/olmo/olmo-results.png new file mode 100644 index 0000000..a1c8e76 Binary files /dev/null and b/img/olmo/olmo-results.png differ diff --git a/img/rag/rag-evolution.png b/img/rag/rag-evolution.png new file mode 100644 index 0000000..ae17f43 Binary files /dev/null and b/img/rag/rag-evolution.png differ diff --git a/img/rag/rag-framework.png b/img/rag/rag-framework.png new file mode 100644 index 0000000..783e541 Binary files /dev/null and b/img/rag/rag-framework.png differ diff --git a/img/rag/rag-paradigms.png b/img/rag/rag-paradigms.png new file mode 100644 index 0000000..188383c Binary files /dev/null and b/img/rag/rag-paradigms.png differ diff --git a/img/rag/rag-process.png b/img/rag/rag-process.png new file mode 100644 index 0000000..7c54d34 Binary files /dev/null and b/img/rag/rag-process.png differ diff --git a/notebooks/pe-code-llama.ipynb b/notebooks/pe-code-llama.ipynb index 97d3c90..1f7d990 100644 --- a/notebooks/pe-code-llama.ipynb +++ b/notebooks/pe-code-llama.ipynb @@ -964,6 +964,13 @@ "\"\"\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, here is the final system prompt, few-shot demonstrations, and final user question:" + ] + }, { "cell_type": "code", "execution_count": 129, @@ -1151,6 +1158,87 @@ "print(chat_completion.choices[0].message.content)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code Infilling\n", + "\n", + "Code infilling deals with predicting missing code given preceding and subsequent code blocks as input. This is particularly important for building applications that enable code completion features like type inferencing and docstring generation.\n", + "\n", + "For this example, we will be using the Code Llama 70B Instruct model hosted by [Fireworks AI](https://fireworks.ai/) as together.ai didn't support this feature as the time of writing this tutorial.\n", + "\n", + "We first need to get a `FIREWORKS_API_KEY` and install the fireworks Python client." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install fireworks-ai" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import fireworks.client\n", + "from dotenv import load_dotenv\n", + "import os\n", + "load_dotenv()\n", + "\n", + "fireworks.client.api_key = os.getenv(\"FIREWORKS_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1. Sort the list in descending order.\n", + " 2. Return the first two elements of the sorted list.\n", + "\n", + "Here's the corrected code:\n", + "\n", + "```\n", + "def two_largest_numbers(numbers: List[Number]) -> Tuple[Number]:\n", + " sorted_numbers = sorted(numbers, reverse=True)\n", + " max = sorted_numbers[0]\n", + " second_max = sorted_numbers[1]\n", + " return max, second_\n" + ] + } + ], + "source": [ + "prefix ='''\n", + "def two_largest_numbers(list: List[Number]) -> Tuple[Number]:\n", + " max = None\n", + " second_max = None\n", + " '''\n", + "suffix = '''\n", + " return max, second_max\n", + "'''\n", + "response = await fireworks.client.ChatCompletion.acreate(\n", + " model=\"accounts/fireworks/models/llama-v2-70b-code-instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prefix}, # FIX HERE\n", + " {\"role\": \"user\", \"content\": suffix}, # FIX HERE\n", + " ],\n", + " max_tokens=100,\n", + " temperature=0,\n", + ")\n", + "print(response.choices[0].message.content)\n" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/pages/models/_meta.ca.json b/pages/models/_meta.ca.json index bac1c07..08c757b 100644 --- a/pages/models/_meta.ca.json +++ b/pages/models/_meta.ca.json @@ -8,5 +8,6 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Col·lecció de Models" } \ No newline at end of file diff --git a/pages/models/_meta.de.json b/pages/models/_meta.de.json index 23f0649..22c5698 100644 --- a/pages/models/_meta.de.json +++ b/pages/models/_meta.de.json @@ -8,5 +8,6 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "LLM-Sammlung" } diff --git a/pages/models/_meta.en.json b/pages/models/_meta.en.json index 51517bb..75ddbd2 100644 --- a/pages/models/_meta.en.json +++ b/pages/models/_meta.en.json @@ -1,13 +1,14 @@ { - "flan": "Flan", "chatgpt": "ChatGPT", - "llama": "LLaMA", + "code-llama": "Code Llama", + "flan": "Flan", + "gemini": "Gemini", "gpt-4": "GPT-4", + "llama": "LLaMA", "mistral-7b": "Mistral 7B", - "gemini": "Gemini", - "phi-2": "Phi-2", "mixtral": "Mixtral", - "code-llama": "Code Llama", + "olmo": "OLMo", + "phi-2": "Phi-2", "collection": "LLM Collection" } \ No newline at end of file diff --git a/pages/models/_meta.es.json b/pages/models/_meta.es.json index 46c4efa..d8d9cd0 100644 --- a/pages/models/_meta.es.json +++ b/pages/models/_meta.es.json @@ -8,5 +8,6 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Listado de LLMs" } diff --git a/pages/models/_meta.fi.json b/pages/models/_meta.fi.json index 8dec8bf..1d8a957 100644 --- a/pages/models/_meta.fi.json +++ b/pages/models/_meta.fi.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Model Collection" } \ No newline at end of file diff --git a/pages/models/_meta.fr.json b/pages/models/_meta.fr.json index 761fecf..bb6fd9c 100644 --- a/pages/models/_meta.fr.json +++ b/pages/models/_meta.fr.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Collection de modèles" } \ No newline at end of file diff --git a/pages/models/_meta.it.json b/pages/models/_meta.it.json index 0a594e4..55e7977 100644 --- a/pages/models/_meta.it.json +++ b/pages/models/_meta.it.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Collezione di Modelli" } diff --git a/pages/models/_meta.jp.json b/pages/models/_meta.jp.json index 8dec8bf..1d8a957 100644 --- a/pages/models/_meta.jp.json +++ b/pages/models/_meta.jp.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Model Collection" } \ No newline at end of file diff --git a/pages/models/_meta.kr.json b/pages/models/_meta.kr.json index e1fd08c..827040e 100644 --- a/pages/models/_meta.kr.json +++ b/pages/models/_meta.kr.json @@ -8,5 +8,6 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Model Collection" } diff --git a/pages/models/_meta.pt.json b/pages/models/_meta.pt.json index 8dec8bf..1d8a957 100644 --- a/pages/models/_meta.pt.json +++ b/pages/models/_meta.pt.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Model Collection" } \ No newline at end of file diff --git a/pages/models/_meta.ru.json b/pages/models/_meta.ru.json index 23f74b9..45bd1ad 100644 --- a/pages/models/_meta.ru.json +++ b/pages/models/_meta.ru.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Коллекция LLM" } \ No newline at end of file diff --git a/pages/models/_meta.tr.json b/pages/models/_meta.tr.json index 3d13dd5..e0256a4 100644 --- a/pages/models/_meta.tr.json +++ b/pages/models/_meta.tr.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "LLM Koleksiyonu" } \ No newline at end of file diff --git a/pages/models/_meta.zh.json b/pages/models/_meta.zh.json index ad28acf..92c2d7c 100644 --- a/pages/models/_meta.zh.json +++ b/pages/models/_meta.zh.json @@ -8,6 +8,7 @@ "phi-2": "Phi-2", "mixtral": "Mixtral", "code-llama": "Code Llama", + "olmo": "OLMo", "collection": "Model Collection" } diff --git a/pages/models/olmo.ca.mdx b/pages/models/olmo.ca.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.ca.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.de.mdx b/pages/models/olmo.de.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.de.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.en.mdx b/pages/models/olmo.en.mdx new file mode 100644 index 0000000..e789174 --- /dev/null +++ b/pages/models/olmo.en.mdx @@ -0,0 +1,62 @@ +# OLMo + +In this guide, we provide an overview of the Open Language Mode (OLMo), including prompts and usage examples. The guide also includes tips, applications, limitations, papers, and additional reading materials related to OLMo. + + +## Introduction to OLMo + +The Allen Institute of AI has [released](https://blog.allenai.org/olmo-open-language-model-87ccfc95f580) a new open language model and framework called OLMo. This effort is meant to provide full access to data, training code, models, evaluation code so as to accelerate the study of language models collectively. + +Their first release includes four variants at the 7B parameter scale and one model at the 1B scale, all trained on at least 2T tokens. This marks the first of many releases which also includes an upcoming 65B OLMo model. + +!["OLMo Models"](../../img/olmo/olmo-models.png) + +The releases includes: + +- full training data, including the [code](https://github.com/allenai/dolma) that produces the data +- full models weights, [training code](https://github.com/allenai/OLMo), logs, metrics, and inference code +- several checkpoints per model +- [evaluation code](https://github.com/allenai/OLMo-Eval) +- fine-tuning code + +All the code, weights, and intermediate checkpoints are released under the [Apache 2.0 License](https://github.com/allenai/OLMo#Apache-2.0-1-ov-file). + +## OLMo-7B + +Both the OLMo-7B and OLMo-1B models adopt a decoder-only transformer architecture. It follows improvements from other models like PaLM and Llama: + +- no biases +- a non-parametric layer norm +- SwiGLU activation function +- Rotary positional embeddings (RoPE) +- a vocabulary of 50,280 + +## Dolma Dataset + +This release also includes the release a pre-training dataset called [Dolma](https://github.com/allenai/dolma) -- a diverse, multi-source corpus of 3 trillion token across 5B documents acquired from 7 different data sources. The creation of Dolma involves steps like language filtering, quality filtering, content filtering, deduplication, multi-source mixing, and tokenization. + +!["Dolma Dataset"](../../img/olmo/dolma-dataset.png) + +The training dataset includes a 2T-token sample from Dolma. The tokens are concatenated together after appending a special `EOS` token to the end of each document. The training instances include groups of consecutive chunks of 2048 tokens, which are also shuffled. + +More training details and hardware specifications to train the models can be found in the paper. + + +## Results + +The models are evaluated on downstream tasks using the [Catwalk](https://github.com/allenai/catwalk). The OLMo models are compared to other several publicly available models like Falcon and Llama 2. Specifically, the model is evaluated on a set of tasks that aim to measure the model's commonsense reasoning abilities. The downstream evaluation suite includes datasets like `piqa` and `hellaswag`. The authors perform zero-shot evaluation using rank classification (i.e., completions are ranked by likelihood) and accuracy is reported. OLMo-7B outperforms all other models on 2 end-tasks and remains top-3 on 8/9 end-tasks. See a summary of the results in the chart below. + +!["OLMo Results"](../../img/olmo/olmo-results.png) + +## Prompting Guide for OLMo + +Coming soon... + +--- + +Figures source: [OLMo: Accelerating the Science of Language Models](https://allenai.org/olmo/olmo-paper.pdf) + +## References + +- [OLMo: Open Language Model](https://blog.allenai.org/olmo-open-language-model-87ccfc95f580) +- [OLMo: Accelerating the Science of Language Models](https://allenai.org/olmo/olmo-paper.pdf) \ No newline at end of file diff --git a/pages/models/olmo.es.mdx b/pages/models/olmo.es.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.es.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.fi.mdx b/pages/models/olmo.fi.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.fi.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.fr.mdx b/pages/models/olmo.fr.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.fr.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.it.mdx b/pages/models/olmo.it.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.it.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.jp.mdx b/pages/models/olmo.jp.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.jp.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.kr.mdx b/pages/models/olmo.kr.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.kr.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.pt.mdx b/pages/models/olmo.pt.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.pt.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.ru.mdx b/pages/models/olmo.ru.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.ru.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.tr.mdx b/pages/models/olmo.tr.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.tr.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/models/olmo.zh.mdx b/pages/models/olmo.zh.mdx new file mode 100644 index 0000000..a886a91 --- /dev/null +++ b/pages/models/olmo.zh.mdx @@ -0,0 +1,3 @@ +# OLMo + +This page needs a translation! Feel free to contribute a translation by clicking the `Edit this page` button on the right side. \ No newline at end of file diff --git a/pages/papers.en.mdx b/pages/papers.en.mdx index 3310026..4addb0d 100644 --- a/pages/papers.en.mdx +++ b/pages/papers.en.mdx @@ -4,6 +4,7 @@ The following are the latest papers (sorted by release date) on prompt engineeri ## Overviews +- [A Survey on Hallucination in Large Language Models: Principles,Taxonomy, Challenges, and Open Questions](https://arxiv.org/abs/2311.05232) (November 2023) - [An RL Perspective on RLHF, Prompting, and Beyond](https://arxiv.org/abs/2310.06147) (October 2023) - [Few-shot Fine-tuning vs. In-context Learning: A Fair Comparison and Evaluation](https://arxiv.org/abs/2305.16938) (May 2023) - [Jailbreaking ChatGPT via Prompt Engineering: An Empirical Study](https://arxiv.org/abs/2305.13860) (May 2023)