Merge pull request #965 from siiddhantt/feature/set-tokens-message-history

feat: dropdown to adjust conversational history limits
2024-11-19 21:25:39 +00:00 · 2024-05-28 09:43:21 +01:00 · 2024-05-28 09:43:21 +01:00 · 2d12a3cd7a
commit 2d12a3cd7a
parent f6c66f6ee4 220d137e66
11 changed files with 152 additions and 27 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@ -95,6 +95,7 @@ def get_vectorstore(data):
    vectorstore = os.path.join("application", vectorstore)
    return vectorstore

+
 def is_azure_configured():
    return (
        settings.OPENAI_API_BASE
@ -221,7 +222,10 @@ def stream():
        chunks = int(data["chunks"])
    else:
        chunks = 2
-
+    if "token_limit" in data:
+        token_limit = data["token_limit"]
+    else:
+        token_limit = settings.DEFAULT_MAX_HISTORY

    # check if active_docs or api_key is set

@ -255,6 +259,7 @@ def stream():
        chat_history=history,
        prompt=prompt,
        chunks=chunks,
+        token_limit=token_limit,
        gpt_model=gpt_model,
        user_api_key=user_api_key,
    )
@ -291,6 +296,10 @@ def api_answer():
        chunks = int(data["chunks"])
    else:
        chunks = 2
+    if "token_limit" in data:
+        token_limit = data["token_limit"]
+    else:
+        token_limit = settings.DEFAULT_MAX_HISTORY

    # use try and except  to check for exception
    try:
@ -322,6 +331,7 @@ def api_answer():
            chat_history=history,
            prompt=prompt,
            chunks=chunks,
+            token_limit=token_limit,
            gpt_model=gpt_model,
            user_api_key=user_api_key,
        )
@ -371,7 +381,6 @@ def api_search():
        source = {}
        user_api_key = None

-
    if (
        source["active_docs"].split("/")[0] == "default"
        or source["active_docs"].split("/")[0] == "local"
@ -379,6 +388,10 @@ def api_search():
        retriever_name = "classic"
    else:
        retriever_name = source["active_docs"]
+    if "token_limit" in data:
+        token_limit = data["token_limit"]
+    else:
+        token_limit = settings.DEFAULT_MAX_HISTORY

    retriever = RetrieverCreator.create_retriever(
        retriever_name,
@ -387,6 +400,7 @@ def api_search():
        chat_history=[],
        prompt="default",
        chunks=chunks,
+        token_limit=token_limit,
        gpt_model=gpt_model,
        user_api_key=user_api_key,
    )
--- a/application/core/settings.py
+++ b/application/core/settings.py
@ -15,7 +15,8 @@ class Settings(BaseSettings):
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
    MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
-    TOKENS_MAX_HISTORY: int = 150
+    DEFAULT_MAX_HISTORY: int = 150
+    MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
    UPLOAD_FOLDER: str = "inputs"
    VECTOR_STORE: str = "faiss"  # "faiss" or "elasticsearch" or "qdrant"
    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@ -15,6 +15,7 @@ class BraveRetSearch(BaseRetriever):
        chat_history,
        prompt,
        chunks=2,
+        token_limit=150,
        gpt_model="docsgpt",
        user_api_key=None,
    ):
@ -24,6 +25,16 @@ class BraveRetSearch(BaseRetriever):
        self.prompt = prompt
        self.chunks = chunks
        self.gpt_model = gpt_model
+        self.token_limit = (
+            token_limit
+            if token_limit
+            < settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+            else settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+        )
        self.user_api_key = user_api_key

    def _get_data(self):
@ -70,10 +81,7 @@ class BraveRetSearch(BaseRetriever):
                    tokens_batch = count_tokens(i["prompt"]) + count_tokens(
                        i["response"]
                    )
-                    if (
-                        tokens_current_history + tokens_batch
-                        < settings.TOKENS_MAX_HISTORY
-                    ):
+                    if tokens_current_history + tokens_batch < self.token_limit:
                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@ -16,6 +16,7 @@ class ClassicRAG(BaseRetriever):
        chat_history,
        prompt,
        chunks=2,
+        token_limit=150,
        gpt_model="docsgpt",
        user_api_key=None,
    ):
@ -25,6 +26,16 @@ class ClassicRAG(BaseRetriever):
        self.prompt = prompt
        self.chunks = chunks
        self.gpt_model = gpt_model
+        self.token_limit = (
+            token_limit
+            if token_limit
+            < settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+            else settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+        )
        self.user_api_key = user_api_key

    def _get_vectorstore(self, source):
@ -85,10 +96,7 @@ class ClassicRAG(BaseRetriever):
                    tokens_batch = count_tokens(i["prompt"]) + count_tokens(
                        i["response"]
                    )
-                    if (
-                        tokens_current_history + tokens_batch
-                        < settings.TOKENS_MAX_HISTORY
-                    ):
+                    if tokens_current_history + tokens_batch < self.token_limit:
                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@ -15,6 +15,7 @@ class DuckDuckSearch(BaseRetriever):
        chat_history,
        prompt,
        chunks=2,
+        token_limit=150,
        gpt_model="docsgpt",
        user_api_key=None,
    ):
@ -24,6 +25,16 @@ class DuckDuckSearch(BaseRetriever):
        self.prompt = prompt
        self.chunks = chunks
        self.gpt_model = gpt_model
+        self.token_limit = (
+            token_limit
+            if token_limit
+            < settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+            else settings.MODEL_TOKEN_LIMITS.get(
+                self.gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+        )
        self.user_api_key = user_api_key

    def _parse_lang_string(self, input_string):
@ -87,10 +98,7 @@ class DuckDuckSearch(BaseRetriever):
                    tokens_batch = count_tokens(i["prompt"]) + count_tokens(
                        i["response"]
                    )
-                    if (
-                        tokens_current_history + tokens_batch
-                        < settings.TOKENS_MAX_HISTORY
-                    ):
+                    if tokens_current_history + tokens_batch < self.token_limit:
                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
--- a/frontend/src/components/Dropdown.tsx
+++ b/frontend/src/components/Dropdown.tsx
@ -20,12 +20,18 @@ function Dropdown({
  options:
    | string[]
    | { name: string; id: string; type: string }[]
-    | { label: string; value: string }[];
-  selectedValue: string | { label: string; value: string } | null;
+    | { label: string; value: string }[]
+    | { value: number; description: string }[];
+  selectedValue:
+    | string
+    | { label: string; value: string }
+    | { value: number; description: string }
+    | null;
  onSelect:
    | ((value: string) => void)
    | ((value: { name: string; id: string; type: string }) => void)
-    | ((value: { label: string; value: string }) => void);
+    | ((value: { label: string; value: string }) => void)
+    | ((value: { value: number; description: string }) => void);
  size?: string;
  rounded?: 'xl' | '3xl';
  border?: 'border' | 'border-2';
@ -64,8 +70,14 @@ function Dropdown({
              !selectedValue && 'text-silver dark:text-gray-400'
            }`}
          >
-            {selectedValue
+            {selectedValue && 'label' in selectedValue
              ? selectedValue.label
+              : selectedValue && 'description' in selectedValue
+              ? `${
+                  selectedValue.value < 1e9
+                    ? selectedValue.value + ` (${selectedValue.description})`
+                    : selectedValue.description
+                }`
              : placeholder
              ? placeholder
              : 'From URL'}
@ -99,7 +111,13 @@ function Dropdown({
                  ? option
                  : option.name
                  ? option.name
-                  : option.label}
+                  : option.label
+                  ? option.label
+                  : `${
+                      option.value < 1e9
+                        ? option.value + ` (${option.description})`
+                        : option.description
+                    }`}
              </span>
              {showEdit && onEdit && (
                <img
--- a/frontend/src/conversation/conversationApi.ts
+++ b/frontend/src/conversation/conversationApi.ts
@ -1,5 +1,6 @@
 import { Answer, FEEDBACK } from './conversationModels';
 import { Doc } from '../preferences/preferenceApi';
+import { selectTokenLimit } from '../preferences/preferenceSlice';

 const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';

@ -38,6 +39,7 @@ export function fetchAnswerApi(
  conversationId: string | null,
  promptId: string | null,
  chunks: string,
+  token_limit: number,
 ): Promise<
  | {
      result: any;
@ -73,6 +75,7 @@ export function fetchAnswerApi(
      conversation_id: conversationId,
      prompt_id: promptId,
      chunks: chunks,
+      token_limit: token_limit,
    }),
    signal,
  })
@ -103,6 +106,7 @@ export function fetchAnswerSteaming(
  conversationId: string | null,
  promptId: string | null,
  chunks: string,
+  token_limit: number,
  onEvent: (event: MessageEvent) => void,
 ): Promise<Answer> {
  const docPath = getDocPath(selectedDocs);
@ -119,6 +123,7 @@ export function fetchAnswerSteaming(
      conversation_id: conversationId,
      prompt_id: promptId,
      chunks: chunks,
+      token_limit: token_limit,
    };
    fetch(apiHost + '/stream', {
      method: 'POST',
@ -181,6 +186,7 @@ export function searchEndpoint(
  conversation_id: string | null,
  history: Array<any> = [],
  chunks: string,
+  token_limit: number,
 ) {
  const docPath = getDocPath(selectedDocs);

@ -190,6 +196,7 @@ export function searchEndpoint(
    conversation_id,
    history,
    chunks: chunks,
+    token_limit: token_limit,
  };
  return fetch(`${apiHost}/api/search`, {
    method: 'POST',
--- a/frontend/src/conversation/conversationSlice.ts
+++ b/frontend/src/conversation/conversationSlice.ts
@ -28,6 +28,7 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
          state.conversation.conversationId,
          state.preference.prompt.id,
          state.preference.chunks,
+          state.preference.token_limit,

          (event) => {
            const data = JSON.parse(event.data);
@ -51,6 +52,7 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
                state.conversation.conversationId,
                state.conversation.queries,
                state.preference.chunks,
+                state.preference.token_limit,
              ).then((sources) => {
                //dispatch streaming sources
                dispatch(
@ -86,6 +88,7 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
          state.conversation.conversationId,
          state.preference.prompt.id,
          state.preference.chunks,
+          state.preference.token_limit,
        );
        if (answer) {
          let sourcesPrepped = [];
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@ -11,8 +11,9 @@ import { ActiveState } from '../models/misc';
 interface Preference {
  apiKey: string;
  prompt: { name: string; id: string; type: string };
-  selectedDocs: Doc | null;
  chunks: string;
+  token_limit: number;
+  selectedDocs: Doc | null;
  sourceDocs: Doc[] | null;
  conversations: { name: string; id: string }[] | null;
  modalState: ActiveState;
@ -22,6 +23,7 @@ const initialState: Preference = {
  apiKey: 'xxx',
  prompt: { name: 'default', id: 'default', type: 'public' },
  chunks: '2',
+  token_limit: 2000,
  selectedDocs: {
    name: 'default',
    language: 'default',
@ -60,6 +62,9 @@ export const prefSlice = createSlice({
    setChunks: (state, action) => {
      state.chunks = action.payload;
    },
+    setTokenLimit: (state, action) => {
+      state.token_limit = action.payload;
+    },
    setModalStateDeleteConv: (state, action: PayloadAction<ActiveState>) => {
      state.modalState = action.payload;
    },
@ -73,6 +78,7 @@ export const {
  setConversations,
  setPrompt,
  setChunks,
+  setTokenLimit,
  setModalStateDeleteConv,
 } = prefSlice.actions;
 export default prefSlice.reducer;
@ -115,6 +121,18 @@ prefListenerMiddleware.startListening({
  },
 });

+prefListenerMiddleware.startListening({
+  matcher: isAnyOf(setTokenLimit),
+  effect: (action, listenerApi) => {
+    localStorage.setItem(
+      'DocsGPTTokenLimit',
+      JSON.stringify(
+        (listenerApi.getState() as RootState).preference.token_limit,
+      ),
+    );
+  },
+});
+
 export const selectApiKey = (state: RootState) => state.preference.apiKey;
 export const selectApiKeyStatus = (state: RootState) =>
  !!state.preference.apiKey;
@ -132,3 +150,5 @@ export const selectConversationId = (state: RootState) =>
  state.conversation.conversationId;
 export const selectPrompt = (state: RootState) => state.preference.prompt;
 export const selectChunks = (state: RootState) => state.preference.chunks;
+export const selectTokenLimit = (state: RootState) =>
+  state.preference.token_limit;
--- a/frontend/src/settings/General.tsx
+++ b/frontend/src/settings/General.tsx
@ -8,6 +8,8 @@ import {
  setPrompt,
  setChunks,
  selectChunks,
+  setTokenLimit,
+  selectTokenLimit,
  setModalStateDeleteConv,
 } from '../preferences/preferenceSlice';

@ -17,10 +19,19 @@ const General: React.FC = () => {
  const themes = ['Light', 'Dark'];
  const languages = ['English'];
  const chunks = ['0', '2', '4', '6', '8', '10'];
+  const token_limits = new Map([
+    [0, 'None'],
+    [100, 'Low'],
+    [1000, 'Medium'],
+    [2000, 'Default'],
+    [4000, 'High'],
+    [1e9, 'Unlimited'],
+  ]);
  const [prompts, setPrompts] = React.useState<
    { name: string; id: string; type: string }[]
  >([]);
  const selectedChunks = useSelector(selectChunks);
+  const selectedTokenLimit = useSelector(selectTokenLimit);
  const [isDarkTheme, toggleTheme] = useDarkTheme();
  const [selectedTheme, setSelectedTheme] = React.useState(
    isDarkTheme ? 'Dark' : 'Light',
@ -87,6 +98,31 @@ const General: React.FC = () => {
          border="border"
        />
      </div>
+      <div className="mb-5">
+        <p className="mb-2 font-bold text-jet dark:text-bright-gray">
+          Conversational history
+        </p>
+        <Dropdown
+          options={Array.from(token_limits, ([value, desc]) => ({
+            value: value,
+            description: desc,
+          }))}
+          selectedValue={{
+            value: selectedTokenLimit,
+            description: token_limits.get(selectedTokenLimit) as string,
+          }}
+          onSelect={({
+            value,
+            description,
+          }: {
+            value: number;
+            description: string;
+          }) => dispatch(setTokenLimit(value))}
+          size="w-56"
+          rounded="3xl"
+          border="border"
+        />
+      </div>
      <div className="mb-5">
        <Prompts
          prompts={prompts}
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@ -7,19 +7,21 @@ import {

 const key = localStorage.getItem('DocsGPTApiKey');
 const prompt = localStorage.getItem('DocsGPTPrompt');
-const doc = localStorage.getItem('DocsGPTRecentDocs');
 const chunks = localStorage.getItem('DocsGPTChunks');
+const token_limit = localStorage.getItem('DocsGPTTokenLimit');
+const doc = localStorage.getItem('DocsGPTRecentDocs');

 const store = configureStore({
  preloadedState: {
    preference: {
      apiKey: key ?? '',
-      chunks: JSON.parse(chunks ?? '2').toString(),
-      selectedDocs: doc !== null ? JSON.parse(doc) : null,
      prompt:
        prompt !== null
          ? JSON.parse(prompt)
          : { name: 'default', id: 'default', type: 'private' },
+      chunks: JSON.parse(chunks ?? '2').toString(),
+      token_limit: token_limit ? parseInt(token_limit) : 2000,
+      selectedDocs: doc !== null ? JSON.parse(doc) : null,
      conversations: null,
      sourceDocs: [
        {