✨ v0.7.0 (#2273 )

🐳 hotfix: Tag Images Workflow Update (#2272 )
🐳 hotfix: Necessary Dockerfile Update (#2271 )
2024-04-01 19:24:01 -04:00 · 2024-04-01 19:04:37 -04:00 · 2024-04-01 18:46:12 -04:00 · 2024-04-01 15:55:44 -04:00 · 2024-04-01 15:48:57 -04:00 · 2024-04-01 13:40:21 -04:00
474 changed files with 18776 additions and 8224 deletions
--- a/.env.example
+++ b/.env.example
@@ -13,9 +13,6 @@
 #               Server Configuration               #
 #==================================================#

-APP_TITLE=LibreChat
-# CUSTOM_FOOTER="My custom footer"
-
 HOST=localhost
 PORT=3080

@@ -26,6 +23,13 @@ DOMAIN_SERVER=http://localhost:3080

 NO_INDEX=true

+#===============#
+# JSON Logging  #
+#===============#
+
+# Use when process console logs in cloud deployment like GCP/AWS
+CONSOLE_JSON=false
+
 #===============#
 # Debug Logging #
 #===============#
@@ -40,38 +44,62 @@ DEBUG_CONSOLE=false
 # UID=1000
 # GID=1000

+#===============#
+# Configuration #
+#===============#
+# Use an absolute path, a relative path, or a URL
+
+# CONFIG_PATH="/alternative/path/to/librechat.yaml"
+
 #===================================================#
 #                     Endpoints                     #
 #===================================================#

-# ENDPOINTS=openAI,assistants,azureOpenAI,bingAI,chatGPTBrowser,google,gptPlugins,anthropic
+# ENDPOINTS=openAI,assistants,azureOpenAI,bingAI,google,gptPlugins,anthropic

 PROXY=

+#===================================#
+# Known Endpoints - librechat.yaml  #
+#===================================#
+# https://docs.librechat.ai/install/configuration/ai_endpoints.html
+
+# GROQ_API_KEY=
+# SHUTTLEAI_KEY=
+# OPENROUTER_KEY=
+# MISTRAL_API_KEY=
+# ANYSCALE_API_KEY=
+# FIREWORKS_API_KEY=
+# PERPLEXITY_API_KEY=
+# TOGETHERAI_API_KEY=
+
 #============#
 # Anthropic  #
 #============#

 ANTHROPIC_API_KEY=user_provided
-ANTHROPIC_MODELS=claude-1,claude-instant-1,claude-2
+# ANTHROPIC_MODELS=claude-3-opus-20240229,claude-3-sonnet-20240229,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
 # ANTHROPIC_REVERSE_PROXY=

 #============#
 # Azure      #
 #============#

-# AZURE_API_KEY=
-AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4
-# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo
-# PLUGINS_USE_AZURE="true"

-AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE
+# Note: these variables are DEPRECATED
+# Use the `librechat.yaml` configuration for `azureOpenAI` instead
+# You may also continue to use them if you opt out of using the `librechat.yaml` configuration

-# AZURE_OPENAI_API_INSTANCE_NAME=
-# AZURE_OPENAI_API_DEPLOYMENT_NAME=
-# AZURE_OPENAI_API_VERSION=
-# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME=
-# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME=
+# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
+# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
+# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
+# AZURE_API_KEY= # Deprecated
+# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
+# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
+# AZURE_OPENAI_API_VERSION= # Deprecated
+# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
+# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
+# PLUGINS_USE_AZURE="true" # Deprecated

 #============#
 # BingAI     #
@@ -80,14 +108,6 @@ AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE
 BINGAI_TOKEN=user_provided
 # BINGAI_HOST=https://cn.bing.com

-#============#
-# ChatGPT    #
-#============#
-
-CHATGPT_TOKEN=
-CHATGPT_MODELS=text-davinci-002-render-sha
-# CHATGPT_REVERSE_PROXY=
-
 #============#
 # Google     #
 #============#
@@ -115,13 +135,13 @@ DEBUG_OPENAI=false

 # OPENAI_REVERSE_PROXY=

-# OPENAI_ORGANIZATION= 
+# OPENAI_ORGANIZATION=

 #====================#
 #   Assistants API   #
 #====================#

-# ASSISTANTS_API_KEY=
+ASSISTANTS_API_KEY=user_provided
 # ASSISTANTS_BASE_URL=
 # ASSISTANTS_MODELS=gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview

@@ -183,6 +203,14 @@ SERPAPI_API_KEY=
 #-----------------
 SD_WEBUI_URL=http://host.docker.internal:7860

+# Tavily
+#-----------------
+TAVILY_API_KEY=
+
+# Traversaal
+#-----------------
+TRAVERSAAL_API_KEY=
+
 # WolframAlpha
 #-----------------
 WOLFRAM_APP_ID=
@@ -238,6 +266,8 @@ LIMIT_MESSAGE_USER=false
 MESSAGE_USER_MAX=40
 MESSAGE_USER_WINDOW=1

+ILLEGAL_MODEL_REQ_SCORE=5
+
 #========================#
 # Balance                #
 #========================#
@@ -294,15 +324,15 @@ OPENID_IMAGE_URL=
 # Email Password Reset   #
 #========================#

-EMAIL_SERVICE=                  
-EMAIL_HOST=                     
-EMAIL_PORT=25                   
-EMAIL_ENCRYPTION=               
-EMAIL_ENCRYPTION_HOSTNAME=      
-EMAIL_ALLOW_SELFSIGNED=         
-EMAIL_USERNAME=                 
-EMAIL_PASSWORD=                 
-EMAIL_FROM_NAME=                
+EMAIL_SERVICE=
+EMAIL_HOST=
+EMAIL_PORT=25
+EMAIL_ENCRYPTION=
+EMAIL_ENCRYPTION_HOSTNAME=
+EMAIL_ALLOW_SELFSIGNED=
+EMAIL_USERNAME=
+EMAIL_PASSWORD=
+EMAIL_FROM_NAME=
 EMAIL_FROM=noreply@librechat.ai

 #========================#
@@ -316,6 +346,16 @@ FIREBASE_STORAGE_BUCKET=
 FIREBASE_MESSAGING_SENDER_ID=
 FIREBASE_APP_ID=

+#===================================================#
+#                        UI                         #
+#===================================================#
+
+APP_TITLE=LibreChat
+# CUSTOM_FOOTER="My custom footer"
+HELP_AND_FAQ_URL=https://librechat.ai
+
+# SHOW_BIRTHDAY_ICON=true
+
 #==================================================#
 #                      Others                      #
 #==================================================#
@@ -323,15 +363,8 @@ FIREBASE_APP_ID=

 # NODE_ENV=

-# If using Redis, you should flush the cache after changing any LibreChat settings
 # REDIS_URI=
 # USE_REDIS=

-# Give the AI Icon a Birthday Hat :) 
-# Will show automatically on February 11th (LibreChat's birthday)
-# Set this to false to disable the birthday hat
-# Set to true to enable all the time.
-# SHOW_BIRTHDAY_ICON=true
-
 # E2E_USER_EMAIL=
-# E2E_USER_PASSWORD=
+# E2E_USER_PASSWORD=
--- a/.eslintrc.js
+++ b/.eslintrc.js
@@ -19,6 +19,7 @@ module.exports = {
    'e2e/playwright-report/**/*',
    'packages/data-provider/types/**/*',
    'packages/data-provider/dist/**/*',
+    'packages/data-provider/test_bundle/**/*',
    'data-node/**/*',
    'meili_data/**/*',
    'node_modules/**/*',
--- a/.github/CODE_OF_CONDUCT.md
+++ b/.github/CODE_OF_CONDUCT.md
@@ -60,7 +60,7 @@ representative at an online or offline event.

 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement here on GitHub or
-on the official [Discord Server](https://discord.gg/uDyZ5Tzhct). 
+on the official [Discord Server](https://discord.librechat.ai). 
 All complaints will be reviewed and investigated promptly and fairly.

 All community leaders are obligated to respect the privacy and security of the
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -8,7 +8,7 @@ If the feature you would like to contribute has not already received prior appro

 Please note that a pull request involving a feature that has not been reviewed and approved by the project maintainers may be rejected. We appreciate your understanding and cooperation.

-If you would like to discuss the changes you wish to make, join our [Discord community](https://discord.gg/uDyZ5Tzhct), where you can engage with other contributors and seek guidance from the community.
+If you would like to discuss the changes you wish to make, join our [Discord community](https://discord.librechat.ai), where you can engage with other contributors and seek guidance from the community.

 ## Our Standards

--- a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml
+++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml
@@ -50,7 +50,7 @@ body:
    id: terms
    attributes:
      label: Code of Conduct
-      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/danny-avila/LibreChat/blob/main/CODE_OF_CONDUCT.md)
+      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/danny-avila/LibreChat/blob/main/.github/CODE_OF_CONDUCT.md)
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
--- a/.github/SECURITY.md
+++ b/.github/SECURITY.md
@@ -12,7 +12,7 @@ When reporting a security vulnerability, you have the following options to reach

 - **Option 2: GitHub Issues**: You can initiate first contact via GitHub Issues. However, please note that initial contact through GitHub Issues should not include any sensitive details.

- **Option 3: Discord Server**: You can join our [Discord community](https://discord.gg/5rbRxn4uME) and initiate first contact in the `#issues` channel. However, please ensure that initial contact through Discord does not include any sensitive details.
+- **Option 3: Discord Server**: You can join our [Discord community](https://discord.librechat.ai) and initiate first contact in the `#issues` channel. However, please ensure that initial contact through Discord does not include any sensitive details.

 _After the initial contact, we will establish a private communication channel for further discussion._

@@ -39,11 +39,11 @@ Please note that as a security-conscious community, we may not always disclose d

 This security policy applies to the following GitHub repository:

- Repository: [LibreChat](https://github.com/danny-avila/LibreChat)
+- Repository: [LibreChat](https://github.librechat.ai)

 ## Contact

-If you have any questions or concerns regarding the security of our project, please join our [Discord community](https://discord.gg/NGaa9RPCft) and report them in the appropriate channel. You can also reach out to us by [opening an issue](https://github.com/danny-avila/LibreChat/issues/new) on GitHub. Please note that the response time may vary depending on the nature and severity of the inquiry.
+If you have any questions or concerns regarding the security of our project, please join our [Discord community](https://discord.librechat.ai) and report them in the appropriate channel. You can also reach out to us by [opening an issue](https://github.com/danny-avila/LibreChat/issues/new) on GitHub. Please note that the response time may vary depending on the nature and severity of the inquiry.

 ## Acknowledgments

--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -15,8 +15,9 @@ Please delete any irrelevant options.
 - [ ] New feature (non-breaking change which adds functionality)
 - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
 - [ ] This change requires a documentation update
- [ ] Documentation update
 - [ ] Translation update
+- [ ] Documentation update
+

 ## Testing

@@ -26,6 +27,8 @@ Please describe your test process and include instructions so that we can reprod

 ## Checklist

+Please delete any irrelevant options.
+
 - [ ] My code adheres to this project's style guidelines
 - [ ] I have performed a self-review of my own code
 - [ ] I have commented in any complex areas of my code
@@ -34,3 +37,4 @@ Please describe your test process and include instructions so that we can reprod
 - [ ] I have written tests demonstrating that my changes are effective or that my feature works
 - [ ] Local unit tests pass with my changes
 - [ ] Any changes dependent on mine have been merged and published in downstream modules.
+- [ ] New documents have been locally validated with mkdocs
--- a/.github/workflows/backend-review.yml
+++ b/.github/workflows/backend-review.yml
@@ -35,6 +35,21 @@ jobs:

      - name: Install Data Provider
        run: npm run build:data-provider
+      
+      - name: Create empty auth.json file
+        run: |
+          mkdir -p api/data
+          echo '{}' > api/data/auth.json
+
+      - name: Check for Circular dependency in rollup
+        working-directory: ./packages/data-provider
+        run: |
+          output=$(npm run rollup:api)
+          echo "$output"
+          if echo "$output" | grep -q "Circular dependency"; then
+            echo "Error: Circular dependency detected!"
+            exit 1
+          fi

      - name: Run unit tests
        run: cd api && npm run test:ci
--- a/.github/workflows/container.yml
+++ b/.github/workflows/container.yml
@@ -1,83 +0,0 @@
-name: Docker Compose Build on Tag
-
-# The workflow is triggered when a tag is pushed
-on:
-  push:
-    tags:
-      - "*"
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      # Check out the repository
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      # Set up Docker
-      - name: Set up Docker
-        uses: docker/setup-buildx-action@v3
-
-      # Set up QEMU for cross-platform builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      # Log in to GitHub Container Registry
-      - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v2
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      # Prepare Docker Build
-      - name: Build Docker images
-        run: |
-          cp .env.example .env
-
-      # Tag and push librechat-api
-      - name: Docker metadata for librechat-api
-        id: meta-librechat-api
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ghcr.io/${{ github.repository_owner }}/librechat-api
-          tags: |
-            type=raw,value=latest
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}
-            type=semver,pattern={{major}}.{{minor}}
-
-      - name: Build and librechat-api
-        uses: docker/build-push-action@v5
-        with:
-          file: Dockerfile.multi
-          context: .
-          push: true
-          tags: ${{ steps.meta-librechat-api.outputs.tags }}
-          platforms: linux/amd64,linux/arm64
-          target: api-build
-
-      # Tag and push librechat
-      - name: Docker metadata for librechat
-        id: meta-librechat
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ghcr.io/${{ github.repository_owner }}/librechat
-          tags: |
-            type=raw,value=latest
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}
-            type=semver,pattern={{major}}.{{minor}}
-
-      - name: Build and librechat
-        uses: docker/build-push-action@v5
-        with:
-          file: Dockerfile
-          context: .
-          push: true
-          tags: ${{ steps.meta-librechat.outputs.tags }}
-          platforms: linux/amd64,linux/arm64
-          target: node
--- a/.github/workflows/dev-images.yml
+++ b/.github/workflows/dev-images.yml
@@ -13,14 +13,27 @@ on:
 jobs:
  build:
    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - target: api-build
+            file: Dockerfile.multi
+            image_name: librechat-dev-api
+          - target: node
+            file: Dockerfile
+            image_name: librechat-dev

    steps:
      # Check out the repository
      - name: Checkout
        uses: actions/checkout@v4

-      # Set up Docker
-      - name: Set up Docker
+      # Set up QEMU
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      # Set up Docker Buildx
+      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      # Log in to GitHub Container Registry
@@ -38,35 +51,22 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

-      # Build Docker images
-      - name: Build Docker images
+      # Prepare the environment
+      - name: Prepare environment
        run: |
          cp .env.example .env
-          docker build -f Dockerfile.multi --target api-build -t librechat-dev-api .
-          docker build -f Dockerfile -t librechat-dev .

-      # Tag and push the images to GitHub Container Registry
-      - name: Tag and push images to GHCR
-        run: |
-          docker tag librechat-dev-api:latest ghcr.io/${{ github.repository_owner }}/librechat-dev-api:${{ github.sha }}
-          docker push ghcr.io/${{ github.repository_owner }}/librechat-dev-api:${{ github.sha }}
-          docker tag librechat-dev-api:latest ghcr.io/${{ github.repository_owner }}/librechat-dev-api:latest
-          docker push ghcr.io/${{ github.repository_owner }}/librechat-dev-api:latest
-
-          docker tag librechat-dev:latest ghcr.io/${{ github.repository_owner }}/librechat-dev:${{ github.sha }}
-          docker push ghcr.io/${{ github.repository_owner }}/librechat-dev:${{ github.sha }}
-          docker tag librechat-dev:latest ghcr.io/${{ github.repository_owner }}/librechat-dev:latest
-          docker push ghcr.io/${{ github.repository_owner }}/librechat-dev:latest
-
-      # Tag and push the images to Docker Hub
-      - name: Tag and push images to Docker Hub
-        run: |
-          docker tag librechat-dev-api:latest ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev-api:${{ github.sha }}
-          docker push ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev-api:${{ github.sha }}
-          docker tag librechat-dev-api:latest ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev-api:latest
-          docker push ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev-api:latest
-
-          docker tag librechat-dev:latest ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev:${{ github.sha }}
-          docker push ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev:${{ github.sha }}
-          docker tag librechat-dev:latest ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev:latest
-          docker push ${{ secrets.DOCKERHUB_USERNAME }}/librechat-dev:latest
+      # Build and push Docker images for each target
+      - name: Build and push Docker images
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ${{ matrix.file }}
+          push: true
+          tags: |
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ github.sha }}
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:latest
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:${{ github.sha }}
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:latest
+          platforms: linux/amd64,linux/arm64
+          target: ${{ matrix.target }}
--- a/.github/workflows/generate_embeddings.yml
+++ b/.github/workflows/generate_embeddings.yml
@@ -0,0 +1,20 @@
+name: 'generate_embeddings'
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - 'docs/**'
+
+jobs:
+  generate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: supabase/embeddings-generator@v0.0.5
+        with:
+          supabase-url: ${{ secrets.SUPABASE_URL }}
+          supabase-service-role-key: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
+          openai-key: ${{ secrets.OPENAI_DOC_EMBEDDINGS_KEY }}
+          docs-root-path: 'docs'
--- a/.github/workflows/latest-images-main.yml
+++ b/.github/workflows/latest-images-main.yml
@@ -1,88 +0,0 @@
-name: Docker Compose Build Latest Tag (Manual Dispatch)
-
-# The workflow is manually triggered
-on:
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      # Check out the repository
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      # Fetch all tags and set the latest tag
-      - name: Fetch tags and set the latest tag
-        run: |
-          git fetch --tags
-          echo "LATEST_TAG=$(git describe --tags `git rev-list --tags --max-count=1`)" >> $GITHUB_ENV
-
-      # Set up Docker
-      - name: Set up Docker
-        uses: docker/setup-buildx-action@v3
-
-      # Set up QEMU
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      # Log in to GitHub Container Registry
-      - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v2
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      # Prepare Docker Build
-      - name: Build Docker images
-        run: cp .env.example .env
-
-      # Docker metadata for librechat-api
-      - name: Docker metadata for librechat-api
-        id: meta-librechat-api
-        uses: docker/metadata-action@v5
-        with:
-          images: ghcr.io/${{ github.repository_owner }}/librechat-api
-          tags: |
-            type=raw,value=${{ env.LATEST_TAG }},enable=true
-            type=raw,value=latest,enable=true
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}
-            type=semver,pattern={{major}}.{{minor}}
-
-      # Build and push librechat-api
-      - name: Build and push librechat-api
-        uses: docker/build-push-action@v5
-        with:
-          file: Dockerfile.multi
-          context: .
-          push: true
-          tags: ${{ steps.meta-librechat-api.outputs.tags }}
-          platforms: linux/amd64,linux/arm64
-          target: api-build
-
-      # Docker metadata for librechat
-      - name: Docker metadata for librechat
-        id: meta-librechat
-        uses: docker/metadata-action@v5
-        with:
-          images: ghcr.io/${{ github.repository_owner }}/librechat
-          tags: |
-            type=raw,value=${{ env.LATEST_TAG }},enable=true
-            type=raw,value=latest,enable=true
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}
-            type=semver,pattern={{major}}.{{minor}}
-
-      # Build and push librechat
-      - name: Build and push librechat
-        uses: docker/build-push-action@v5
-        with:
-          file: Dockerfile
-          context: .
-          push: true
-          tags: ${{ steps.meta-librechat.outputs.tags }}
-          platforms: linux/amd64,linux/arm64
-          target: node
--- a/.github/workflows/main-image-workflow.yml
+++ b/.github/workflows/main-image-workflow.yml
@@ -0,0 +1,69 @@
+name: Docker Compose Build Latest Main Image Tag (Manual Dispatch)
+
+on:
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - target: api-build
+            file: Dockerfile.multi
+            image_name: librechat-api
+          - target: node
+            file: Dockerfile
+            image_name: librechat
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Fetch tags and set the latest tag
+        run: |
+          git fetch --tags
+          echo "LATEST_TAG=$(git describe --tags `git rev-list --tags --max-count=1`)" >> $GITHUB_ENV
+
+      # Set up QEMU
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      # Set up Docker Buildx
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      # Log in to GitHub Container Registry
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Login to Docker Hub
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      # Prepare the environment
+      - name: Prepare environment
+        run: |
+          cp .env.example .env
+
+      # Build and push Docker images for each target
+      - name: Build and push Docker images
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ${{ matrix.file }}
+          push: true
+          tags: |
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ env.LATEST_TAG }}
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:latest
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:${{ env.LATEST_TAG }}
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:latest
+          platforms: linux/amd64,linux/arm64
+          target: ${{ matrix.target }}
--- a/.github/workflows/tag-images.yml
+++ b/.github/workflows/tag-images.yml
@@ -0,0 +1,67 @@
+name: Docker Images Build on Tag
+
+on:
+  push:
+    tags:
+      - '*'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - target: api-build
+            file: Dockerfile.multi
+            image_name: librechat-api
+          - target: node
+            file: Dockerfile
+            image_name: librechat
+
+    steps:
+      # Check out the repository
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      # Set up QEMU
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      # Set up Docker Buildx
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      # Log in to GitHub Container Registry
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Login to Docker Hub
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      # Prepare the environment
+      - name: Prepare environment
+        run: |
+          cp .env.example .env
+
+      # Build and push Docker images for each target
+      - name: Build and push Docker images
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ${{ matrix.file }}
+          push: true
+          tags: |
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ github.ref_name }}
+            ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:latest
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:${{ github.ref_name }}
+            ${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.image_name }}:latest
+          platforms: linux/amd64,linux/arm64
+          target: ${{ matrix.target }}
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ bower_components/

 #config file
 librechat.yaml
+librechat.yml

 # Environment
 .npmrc
@@ -74,6 +75,7 @@ src/style - official.css
 config.local.ts
 **/storageState.json
 junit.xml
+**/.venv/

 # docker override file
 docker-compose.override.yaml
@@ -91,4 +93,7 @@ auth.json
 !client/src/components/Nav/SettingsTabs/Data/

 # User uploads
-uploads/
+uploads/
+
+# owner
+release/
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1,4 +1,4 @@
-#!/usr/bin/env sh
+#!/usr/bin/env sh 
 set -e
 . "$(dirname -- "$0")/_/husky.sh"
 [ -n "$CI" ] && exit 0
--- a/23
+++ b/23
@@ -1,15 +1,26 @@
-# Base node image
-FROM node:18-alpine AS node
+# v0.7.0

-COPY . /app
+# Base node image
+FROM node:18-alpine3.18 AS node
+
+RUN apk add g++ make py3-pip
+RUN npm install -g node-gyp
+RUN apk --no-cache add curl
+
+RUN mkdir -p /app && chown node:node /app
 WORKDIR /app

+USER node
+
+COPY --chown=node:node . .
+
 # Allow mounting of these files, which have no default
 # values.
 RUN touch .env
-# Install call deps - Install curl for health check
-RUN apk --no-cache add curl && \
-    npm ci
+RUN npm config set fetch-retry-maxtimeout 600000
+RUN npm config set fetch-retries 5
+RUN npm config set fetch-retry-mintimeout 15000
+RUN npm install --no-audit

 # React client build
 ENV NODE_OPTIONS="--max-old-space-size=2048"
--- a/Dockerfile.multi
+++ b/Dockerfile.multi
@@ -1,3 +1,5 @@
+# v0.7.0
+
 # Build API, Client and Data Provider
 FROM node:20-alpine AS base

@@ -24,6 +26,8 @@ FROM data-provider-build AS api-build
 WORKDIR /app/api
 COPY api/package*.json ./
 COPY api/ ./
+# Copy helper scripts
+COPY config/ ./
 # Copy data-provider to API's node_modules
 RUN mkdir -p /app/api/node_modules/librechat-data-provider/
 RUN cp -R /app/packages/data-provider/* /app/api/node_modules/librechat-data-provider/
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 <p align="center">
-  <a href="https://docs.librechat.ai">
+  <a href="https://librechat.ai">
    <img src="docs/assets/LibreChat.svg" height="256">
  </a>
-  <a href="https://docs.librechat.ai">
-    <h1 align="center">LibreChat</h1>
-  </a>
+  <h1 align="center">
+    <a href="https://librechat.ai">LibreChat</a>
+  </h1>
 </p>

 <p align="center">
@@ -39,30 +39,36 @@
 </p>

 # 📃 Features
- - 🖥️ UI matching ChatGPT, including Dark mode, Streaming, and 11-2023 updates
- - 💬 Multimodal Chat:
-     - Upload and analyze images with GPT-4 and Gemini Vision 📸
-     - More filetypes and Assistants API integration in Active Development 🚧 
- - 🌎 Multilingual UI:
-     - English, 中文, Deutsch, Español, Français, Italiano, Polski, Português Brasileiro,
-     - Русский, 日本語, Svenska, 한국어, Tiếng Việt, 繁體中文, العربية, Türkçe, Nederlands
- - 🤖 AI model selection: OpenAI API, Azure, BingAI, ChatGPT, Google Vertex AI, Anthropic (Claude), Plugins
- - 💾 Create, Save, & Share Custom Presets
- - 🔄 Edit, Resubmit, and Continue messages with conversation branching
- - 📤 Export conversations as screenshots, markdown, text, json.
- - 🔍 Search all messages/conversations
- - 🔌 Plugins, including web access, image generation with DALL-E-3 and more
- - 👥 Multi-User, Secure Authentication with Moderation and Token spend tools
- - ⚙️ Configure Proxy, Reverse Proxy, Docker, many Deployment options, and completely Open-Source
+
+- 🖥️ UI matching ChatGPT, including Dark mode, Streaming, and latest updates
+- 💬 Multimodal Chat:
+    - Upload and analyze images with Claude 3, GPT-4, and Gemini Vision 📸
+    - Chat with Files using Custom Endpoints, OpenAI, Azure, Anthropic, & Google. 🗃️
+    - Advanced Agents with Files, Code Interpreter, Tools, and API Actions 🔦
+      - Available through the [OpenAI Assistants API](https://platform.openai.com/docs/assistants/overview) 🌤️
+      - Non-OpenAI Agents in Active Development 🚧
+- 🌎 Multilingual UI:
+  - English, 中文, Deutsch, Español, Français, Italiano, Polski, Português Brasileiro,
+  - Русский, 日本語, Svenska, 한국어, Tiếng Việt, 繁體中文, العربية, Türkçe, Nederlands, עברית
+- 🤖 AI model selection: OpenAI, Azure OpenAI, BingAI, ChatGPT, Google Vertex AI, Anthropic (Claude), Plugins, Assistants API (including Azure Assistants)
+- 💾 Create, Save, & Share Custom Presets
+- 🔄 Edit, Resubmit, and Continue messages with conversation branching
+- 📤 Export conversations as screenshots, markdown, text, json.
+- 🔍 Search all messages/conversations
+- 🔌 Plugins, including web access, image generation with DALL-E-3 and more
+- 👥 Multi-User, Secure Authentication with Moderation and Token spend tools
+- ⚙️ Configure Proxy, Reverse Proxy, Docker, & many Deployment options
+- 📖 Completely Open-Source & Built in Public
+- 🧑‍🤝‍🧑 Community-driven development, support, and feedback

 [For a thorough review of our features, see our docs here](https://docs.librechat.ai/features/plugins/introduction.html) 📚

-
 ## 🪶 All-In-One AI Conversations with LibreChat
+
 LibreChat brings together the future of assistant AIs with the revolutionary technology of OpenAI's ChatGPT. Celebrating the original styling, LibreChat gives you the ability to integrate multiple AI models. It also integrates and enhances original client features such as conversation and message search, prompt templates and plugins.

 With LibreChat, you no longer need to opt for ChatGPT Plus and can instead use free or pay-per-call APIs. We welcome contributions, cloning, and forking to enhance the capabilities of this advanced chatbot platform.
-  
+
 <!-- https://github.com/danny-avila/LibreChat/assets/110412045/c1eb0c0f-41f6-4335-b982-84b278b53d59 -->

 [![Watch the video](https://img.youtube.com/vi/pNIOs1ovsXw/maxresdefault.jpg)](https://youtu.be/pNIOs1ovsXw)
@@ -71,11 +77,13 @@ Click on the thumbnail to open the video☝️
 ---

 ## 📚 Documentation
+
 For more information on how to use our advanced features, install and configure our software, and access our guidelines and tutorials, please check out our documentation at [docs.librechat.ai](https://docs.librechat.ai)

 ---

-## 📝 Changelog 
+## 📝 Changelog
+
 Keep up with the latest updates by visiting the releases page - [Releases](https://github.com/danny-avila/LibreChat/releases)

 **⚠️ [Breaking Changes](docs/general_info/breaking_changes.md)**
@@ -96,14 +104,15 @@ Please consult the breaking changes before updating.
 ---

 ## ✨ Contributions
+
 Contributions, suggestions, bug reports and fixes are welcome!

-For new features, components, or extensions, please open an issue and discuss before sending a PR. 
+For new features, components, or extensions, please open an issue and discuss before sending a PR.

 ---

-💖 This project exists in its current state thanks to all the people who contribute
---
+## 💖 This project exists in its current state thanks to all the people who contribute
+
 <a href="https://github.com/danny-avila/LibreChat/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=danny-avila/LibreChat" />
 </a>
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@@ -1,6 +1,19 @@
 const Anthropic = require('@anthropic-ai/sdk');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
-const { getResponseSender, EModelEndpoint } = require('librechat-data-provider');
+const {
+  getResponseSender,
+  EModelEndpoint,
+  validateVisionModel,
+} = require('librechat-data-provider');
+const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const {
+  titleFunctionPrompt,
+  parseTitleFromPrompt,
+  truncateText,
+  formatMessage,
+  createContextHandlers,
+} = require('./prompts');
+const spendTokens = require('~/models/spendTokens');
 const { getModelMaxTokens } = require('~/utils');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');
@@ -10,12 +23,20 @@ const AI_PROMPT = '\n\nAssistant:';

 const tokenizersCache = {};

+/** Helper function to introduce a delay before retrying */
+function delayBeforeRetry(attempts, baseDelay = 1000) {
+  return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
+}
+
 class AnthropicClient extends BaseClient {
  constructor(apiKey, options = {}) {
    super(apiKey, options);
    this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY;
    this.userLabel = HUMAN_PROMPT;
    this.assistantLabel = AI_PROMPT;
+    this.contextStrategy = options.contextStrategy
+      ? options.contextStrategy.toLowerCase()
+      : 'discard';
    this.setOptions(options);
  }

@@ -47,6 +68,12 @@ class AnthropicClient extends BaseClient {
      stop: modelOptions.stop, // no stop method for now
    };

+    this.isClaude3 = this.modelOptions.model.includes('claude-3');
+    this.useMessages = this.isClaude3 || !!this.options.attachments;
+
+    this.defaultVisionModel = this.options.visionModel ?? 'claude-3-sonnet-20240229';
+    this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));
+
    this.maxContextTokens =
      getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? 100000;
    this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
@@ -87,7 +114,12 @@ class AnthropicClient extends BaseClient {
    return this;
  }

+  /**
+   * Get the initialized Anthropic client.
+   * @returns {Anthropic} The Anthropic client instance.
+   */
  getClient() {
+    /** @type {Anthropic.default.RequestOptions} */
    const options = {
      apiKey: this.apiKey,
    };
@@ -99,6 +131,75 @@ class AnthropicClient extends BaseClient {
    return new Anthropic(options);
  }

+  getTokenCountForResponse(response) {
+    return this.getTokenCountForMessage({
+      role: 'assistant',
+      content: response.text,
+    });
+  }
+
+  /**
+   *
+   * Checks if the model is a vision model based on request attachments and sets the appropriate options:
+   * - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
+   * - Sets `this.isVisionModel` to `true` if vision request.
+   * - Deletes `this.modelOptions.stop` if vision request.
+   * @param {MongoFile[]} attachments
+   */
+  checkVisionRequest(attachments) {
+    const availableModels = this.options.modelsConfig?.[EModelEndpoint.anthropic];
+    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
+
+    const visionModelAvailable = availableModels?.includes(this.defaultVisionModel);
+    if (
+      attachments &&
+      attachments.some((file) => file?.type && file?.type?.includes('image')) &&
+      visionModelAvailable &&
+      !this.isVisionModel
+    ) {
+      this.modelOptions.model = this.defaultVisionModel;
+      this.isVisionModel = true;
+    }
+  }
+
+  /**
+   * Calculate the token cost in tokens for an image based on its dimensions and detail level.
+   *
+   * For reference, see: https://docs.anthropic.com/claude/docs/vision#image-costs
+   *
+   * @param {Object} image - The image object.
+   * @param {number} image.width - The width of the image.
+   * @param {number} image.height - The height of the image.
+   * @returns {number} The calculated token cost measured by tokens.
+   *
+   */
+  calculateImageTokenCost({ width, height }) {
+    return Math.ceil((width * height) / 750);
+  }
+
+  async addImageURLs(message, attachments) {
+    const { files, image_urls } = await encodeAndFormat(
+      this.options.req,
+      attachments,
+      EModelEndpoint.anthropic,
+    );
+    message.image_urls = image_urls.length ? image_urls : undefined;
+    return files;
+  }
+
+  async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) {
+    await spendTokens(
+      {
+        context,
+        user: this.user,
+        conversationId: this.conversationId,
+        model: model ?? this.modelOptions.model,
+        endpointTokenConfig: this.options.endpointTokenConfig,
+      },
+      { promptTokens, completionTokens },
+    );
+  }
+
  async buildMessages(messages, parentMessageId) {
    const orderedMessages = this.constructor.getMessagesForConversation({
      messages,
@@ -107,28 +208,145 @@ class AnthropicClient extends BaseClient {

    logger.debug('[AnthropicClient] orderedMessages', { orderedMessages, parentMessageId });

-    const formattedMessages = orderedMessages.map((message) => ({
-      author: message.isCreatedByUser ? this.userLabel : this.assistantLabel,
-      content: message?.content ?? message.text,
-    }));
+    if (this.options.attachments) {
+      const attachments = await this.options.attachments;
+      const images = attachments.filter((file) => file.type.includes('image'));
+
+      if (images.length && !this.isVisionModel) {
+        throw new Error('Images are only supported with the Claude 3 family of models');
+      }
+
+      const latestMessage = orderedMessages[orderedMessages.length - 1];
+
+      if (this.message_file_map) {
+        this.message_file_map[latestMessage.messageId] = attachments;
+      } else {
+        this.message_file_map = {
+          [latestMessage.messageId]: attachments,
+        };
+      }
+
+      const files = await this.addImageURLs(latestMessage, attachments);
+
+      this.options.attachments = files;
+    }
+
+    if (this.message_file_map) {
+      this.contextHandlers = createContextHandlers(
+        this.options.req,
+        orderedMessages[orderedMessages.length - 1].text,
+      );
+    }
+
+    const formattedMessages = orderedMessages.map((message, i) => {
+      const formattedMessage = this.useMessages
+        ? formatMessage({
+          message,
+          endpoint: EModelEndpoint.anthropic,
+        })
+        : {
+          author: message.isCreatedByUser ? this.userLabel : this.assistantLabel,
+          content: message?.content ?? message.text,
+        };
+
+      const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
+      /* If tokens were never counted, or, is a Vision request and the message has files, count again */
+      if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
+        orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
+      }
+
+      /* If message has files, calculate image token cost */
+      if (this.message_file_map && this.message_file_map[message.messageId]) {
+        const attachments = this.message_file_map[message.messageId];
+        for (const file of attachments) {
+          if (file.embedded) {
+            this.contextHandlers?.processFile(file);
+            continue;
+          }
+
+          orderedMessages[i].tokenCount += this.calculateImageTokenCost({
+            width: file.width,
+            height: file.height,
+          });
+        }
+      }
+
+      formattedMessage.tokenCount = orderedMessages[i].tokenCount;
+      return formattedMessage;
+    });
+
+    if (this.contextHandlers) {
+      this.augmentedPrompt = await this.contextHandlers.createContext();
+      this.options.promptPrefix = this.augmentedPrompt + (this.options.promptPrefix ?? '');
+    }
+
+    let { context: messagesInWindow, remainingContextTokens } =
+      await this.getMessagesWithinTokenLimit(formattedMessages);
+
+    const tokenCountMap = orderedMessages
+      .slice(orderedMessages.length - messagesInWindow.length)
+      .reduce((map, message, index) => {
+        const { messageId } = message;
+        if (!messageId) {
+          return map;
+        }
+
+        map[messageId] = orderedMessages[index].tokenCount;
+        return map;
+      }, {});
+
+    logger.debug('[AnthropicClient]', {
+      messagesInWindow: messagesInWindow.length,
+      remainingContextTokens,
+    });

    let lastAuthor = '';
    let groupedMessages = [];

-    for (let message of formattedMessages) {
+    for (let i = 0; i < messagesInWindow.length; i++) {
+      const message = messagesInWindow[i];
+      const author = message.role ?? message.author;
      // If last author is not same as current author, add to new group
-      if (lastAuthor !== message.author) {
-        groupedMessages.push({
-          author: message.author,
+      if (lastAuthor !== author) {
+        const newMessage = {
          content: [message.content],
-        });
-        lastAuthor = message.author;
+        };
+
+        if (message.role) {
+          newMessage.role = message.role;
+        } else {
+          newMessage.author = message.author;
+        }
+
+        groupedMessages.push(newMessage);
+        lastAuthor = author;
        // If same author, append content to the last group
      } else {
        groupedMessages[groupedMessages.length - 1].content.push(message.content);
      }
    }

+    groupedMessages = groupedMessages.map((msg, i) => {
+      const isLast = i === groupedMessages.length - 1;
+      if (msg.content.length === 1) {
+        const content = msg.content[0];
+        return {
+          ...msg,
+          // reason: final assistant content cannot end with trailing whitespace
+          content:
+            isLast && this.useMessages && msg.role === 'assistant' && typeof content === 'string'
+              ? content?.trim()
+              : content,
+        };
+      }
+
+      if (!this.useMessages && msg.tokenCount) {
+        delete msg.tokenCount;
+      }
+
+      return msg;
+    });
+
    let identityPrefix = '';
    if (this.options.userLabel) {
      identityPrefix = `\nHuman's name: ${this.options.userLabel}`;
@@ -154,9 +372,10 @@ class AnthropicClient extends BaseClient {
    // Prompt AI to respond, empty if last message was from AI
    let isEdited = lastAuthor === this.assistantLabel;
    const promptSuffix = isEdited ? '' : `${promptPrefix}${this.assistantLabel}\n`;
-    let currentTokenCount = isEdited
-      ? this.getTokenCount(promptPrefix)
-      : this.getTokenCount(promptSuffix);
+    let currentTokenCount =
+      isEdited || this.useMessages
+        ? this.getTokenCount(promptPrefix)
+        : this.getTokenCount(promptSuffix);

    let promptBody = '';
    const maxTokenCount = this.maxPromptTokens;
@@ -224,7 +443,69 @@ class AnthropicClient extends BaseClient {
      return true;
    };

-    await buildPromptBody();
+    const messagesPayload = [];
+    const buildMessagesPayload = async () => {
+      let canContinue = true;
+
+      if (promptPrefix) {
+        this.systemMessage = promptPrefix;
+      }
+
+      while (currentTokenCount < maxTokenCount && groupedMessages.length > 0 && canContinue) {
+        const message = groupedMessages.pop();
+
+        let tokenCountForMessage = message.tokenCount ?? this.getTokenCountForMessage(message);
+
+        const newTokenCount = currentTokenCount + tokenCountForMessage;
+        const exceededMaxCount = newTokenCount > maxTokenCount;
+
+        if (exceededMaxCount && messagesPayload.length === 0) {
+          throw new Error(
+            `Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`,
+          );
+        } else if (exceededMaxCount) {
+          canContinue = false;
+          break;
+        }
+
+        delete message.tokenCount;
+        messagesPayload.unshift(message);
+        currentTokenCount = newTokenCount;
+
+        // Switch off isEdited after using it once
+        if (isEdited && message.role === 'assistant') {
+          isEdited = false;
+        }
+
+        // Wait for next tick to avoid blocking the event loop
+        await new Promise((resolve) => setImmediate(resolve));
+      }
+    };
+
+    const processTokens = () => {
+      // Add 2 tokens for metadata after all messages have been counted.
+      currentTokenCount += 2;
+
+      // Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
+      this.modelOptions.maxOutputTokens = Math.min(
+        this.maxContextTokens - currentTokenCount,
+        this.maxResponseTokens,
+      );
+    };
+
+    if (this.modelOptions.model.startsWith('claude-3')) {
+      await buildMessagesPayload();
+      processTokens();
+      return {
+        prompt: messagesPayload,
+        context: messagesInWindow,
+        promptTokens: currentTokenCount,
+        tokenCountMap,
+      };
+    } else {
+      await buildPromptBody();
+      processTokens();
+    }

    if (nextMessage.remove) {
      promptBody = promptBody.replace(nextMessage.messageString, '');
@@ -234,22 +515,26 @@ class AnthropicClient extends BaseClient {

    let prompt = `${promptBody}${promptSuffix}`;

-    // Add 2 tokens for metadata after all messages have been counted.
-    currentTokenCount += 2;
-
-    // Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
-    this.modelOptions.maxOutputTokens = Math.min(
-      this.maxContextTokens - currentTokenCount,
-      this.maxResponseTokens,
-    );
-
-    return { prompt, context };
+    return { prompt, context, promptTokens: currentTokenCount, tokenCountMap };
  }

  getCompletion() {
    logger.debug('AnthropicClient doesn\'t use getCompletion (all handled in sendCompletion)');
  }

+  /**
+   * Creates a message or completion response using the Anthropic client.
+   * @param {Anthropic} client - The Anthropic client instance.
+   * @param {Anthropic.default.MessageCreateParams | Anthropic.default.CompletionCreateParams} options - The options for the message or completion.
+   * @param {boolean} useMessages - Whether to use messages or completions. Defaults to `this.useMessages`.
+   * @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
+   */
+  async createResponse(client, options, useMessages) {
+    return useMessages ?? this.useMessages
+      ? await client.messages.create(options)
+      : await client.completions.create(options);
+  }
+
  async sendCompletion(payload, { onProgress, abortController }) {
    if (!abortController) {
      abortController = new AbortController();
@@ -279,36 +564,88 @@ class AnthropicClient extends BaseClient {
      topP: top_p,
      topK: top_k,
    } = this.modelOptions;
+
    const requestOptions = {
-      prompt: payload,
      model,
      stream: stream || true,
-      max_tokens_to_sample: maxOutputTokens || 1500,
      stop_sequences,
      temperature,
      metadata,
      top_p,
      top_k,
    };
-    logger.debug('[AnthropicClient]', { ...requestOptions });
-    const response = await client.completions.create(requestOptions);

-    signal.addEventListener('abort', () => {
-      logger.debug('[AnthropicClient] message aborted!');
-      response.controller.abort();
-    });
-
-    for await (const completion of response) {
-      // Uncomment to debug message stream
-      // logger.debug(completion);
-      text += completion.completion;
-      onProgress(completion.completion);
+    if (this.useMessages) {
+      requestOptions.messages = payload;
+      requestOptions.max_tokens = maxOutputTokens || 1500;
+    } else {
+      requestOptions.prompt = payload;
+      requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
    }

-    signal.removeEventListener('abort', () => {
-      logger.debug('[AnthropicClient] message aborted!');
-      response.controller.abort();
-    });
+    if (this.systemMessage) {
+      requestOptions.system = this.systemMessage;
+    }
+
+    logger.debug('[AnthropicClient]', { ...requestOptions });
+
+    const handleChunk = (currentChunk) => {
+      if (currentChunk) {
+        text += currentChunk;
+        onProgress(currentChunk);
+      }
+    };
+
+    const maxRetries = 3;
+    async function processResponse() {
+      let attempts = 0;
+
+      while (attempts < maxRetries) {
+        let response;
+        try {
+          response = await this.createResponse(client, requestOptions);
+
+          signal.addEventListener('abort', () => {
+            logger.debug('[AnthropicClient] message aborted!');
+            if (response.controller?.abort) {
+              response.controller.abort();
+            }
+          });
+
+          for await (const completion of response) {
+            // Handle each completion as before
+            if (completion?.delta?.text) {
+              handleChunk(completion.delta.text);
+            } else if (completion.completion) {
+              handleChunk(completion.completion);
+            }
+          }
+
+          // Successful processing, exit loop
+          break;
+        } catch (error) {
+          attempts += 1;
+          logger.warn(
+            `User: ${this.user} | Anthropic Request ${attempts} failed: ${error.message}`,
+          );
+
+          if (attempts < maxRetries) {
+            await delayBeforeRetry(attempts, 350);
+          } else {
+            throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
+          }
+        } finally {
+          signal.removeEventListener('abort', () => {
+            logger.debug('[AnthropicClient] message aborted!');
+            if (response.controller?.abort) {
+              response.controller.abort();
+            }
+          });
+        }
+      }
+    }
+
+    await processResponse.bind(this)();

    return text.trim();
  }
@@ -317,6 +654,7 @@ class AnthropicClient extends BaseClient {
    return {
      promptPrefix: this.options.promptPrefix,
      modelLabel: this.options.modelLabel,
+      resendFiles: this.options.resendFiles,
      ...this.modelOptions,
    };
  }
@@ -342,6 +680,78 @@ class AnthropicClient extends BaseClient {
  getTokenCount(text) {
    return this.gptEncoder.encode(text, 'all').length;
  }
+
+  /**
+   * Generates a concise title for a conversation based on the user's input text and response.
+   * Involves sending a chat completion request with specific instructions for title generation.
+   *
+   * This function capitlizes on [Anthropic's function calling training](https://docs.anthropic.com/claude/docs/functions-external-tools).
+   *
+   * @param {Object} params - The parameters for the conversation title generation.
+   * @param {string} params.text - The user's input.
+   * @param {string} [params.responseText=''] - The AI's immediate response to the user.
+   *
+   * @returns {Promise<string | 'New Chat'>} A promise that resolves to the generated conversation title.
+   *                            In case of failure, it will return the default title, "New Chat".
+   */
+  async titleConvo({ text, responseText = '' }) {
+    let title = 'New Chat';
+    const convo = `<initial_message>
+  ${truncateText(text)}
+  </initial_message>
+  <response>
+  ${JSON.stringify(truncateText(responseText))}
+  </response>`;
+
+    const { ANTHROPIC_TITLE_MODEL } = process.env ?? {};
+    const model = this.options.titleModel ?? ANTHROPIC_TITLE_MODEL ?? 'claude-3-haiku-20240307';
+    const system = titleFunctionPrompt;
+
+    const titleChatCompletion = async () => {
+      const content = `<conversation_context>
+  ${convo}
+  </conversation_context>
+  
+  Please generate a title for this conversation.`;
+
+      const titleMessage = { role: 'user', content };
+      const requestOptions = {
+        model,
+        temperature: 0.3,
+        max_tokens: 1024,
+        system,
+        stop_sequences: ['\n\nHuman:', '\n\nAssistant', '</function_calls>'],
+        messages: [titleMessage],
+      };
+
+      try {
+        const response = await this.createResponse(this.getClient(), requestOptions, true);
+        let promptTokens = response?.usage?.input_tokens;
+        let completionTokens = response?.usage?.output_tokens;
+        if (!promptTokens) {
+          promptTokens = this.getTokenCountForMessage(titleMessage);
+          promptTokens += this.getTokenCountForMessage({ role: 'system', content: system });
+        }
+        if (!completionTokens) {
+          completionTokens = this.getTokenCountForMessage(response.content[0]);
+        }
+        await this.recordTokenUsage({
+          model,
+          promptTokens,
+          completionTokens,
+          context: 'title',
+        });
+        const text = response.content[0].text;
+        title = parseTitleFromPrompt(text);
+      } catch (e) {
+        logger.error('[AnthropicClient] There was an issue generating the title', e);
+      }
+    };
+
+    await titleChatCompletion();
+    logger.debug('[AnthropicClient] Convo Title: ' + title);
+    return title;
+  }
 }

 module.exports = AnthropicClient;
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@@ -3,6 +3,7 @@ const { supportsBalanceCheck, Constants } = require('librechat-data-provider');
 const { getConvo, getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
 const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
 const checkBalance = require('~/models/checkBalance');
+const { getFiles } = require('~/models/File');
 const TextStream = require('./TextStream');
 const { logger } = require('~/config');

@@ -46,10 +47,6 @@ class BaseClient {
    logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response);
  }

-  async addPreviousAttachments(messages) {
-    return messages;
-  }
-
  async recordTokenUsage({ promptTokens, completionTokens }) {
    logger.debug('`[BaseClient] recordTokenUsage` not implemented.', {
      promptTokens,
@@ -447,6 +444,8 @@ class BaseClient {
    }

    const completion = await this.sendCompletion(payload, opts);
+    this.abortController.requestCompleted = true;
+
    const responseMessage = {
      messageId: responseMessageId,
      conversationId,
@@ -457,6 +456,7 @@ class BaseClient {
      sender: this.sender,
      text: addSpaceIfNeeded(generation) + completion,
      promptTokens,
+      ...(this.metadata ?? {}),
    };

    if (
@@ -681,6 +681,54 @@ class BaseClient {

    return await this.sendCompletion(payload, opts);
  }
+
+  /**
+   *
+   * @param {TMessage[]} _messages
+   * @returns {Promise<TMessage[]>}
+   */
+  async addPreviousAttachments(_messages) {
+    if (!this.options.resendFiles) {
+      return _messages;
+    }
+
+    /**
+     *
+     * @param {TMessage} message
+     */
+    const processMessage = async (message) => {
+      if (!this.message_file_map) {
+        /** @type {Record<string, MongoFile[]> */
+        this.message_file_map = {};
+      }
+
+      const fileIds = message.files.map((file) => file.file_id);
+      const files = await getFiles({
+        file_id: { $in: fileIds },
+      });
+
+      await this.addImageURLs(message, files);
+
+      this.message_file_map[message.messageId] = files;
+      return message;
+    };
+
+    const promises = [];
+
+    for (const message of _messages) {
+      if (!message.files) {
+        promises.push(message);
+        continue;
+      }
+
+      promises.push(processMessage(message));
+    }
+
+    const messages = await Promise.all(promises);
+
+    this.checkVisionRequest(Object.values(this.message_file_map ?? {}).flat());
+    return messages;
+  }
 }

 module.exports = BaseClient;
--- a/api/app/clients/ChatGPTClient.js
+++ b/api/app/clients/ChatGPTClient.js
@@ -1,9 +1,16 @@
-const crypto = require('crypto');
 const Keyv = require('keyv');
+const crypto = require('crypto');
+const {
+  EModelEndpoint,
+  resolveHeaders,
+  mapModelToAzureConfig,
+} = require('librechat-data-provider');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
 const { Agent, ProxyAgent } = require('undici');
 const BaseClient = require('./BaseClient');
+const { logger } = require('~/config');
+const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils');

 const CHATGPT_MODEL = 'gpt-3.5-turbo';
 const tokenizersCache = {};
@@ -144,7 +151,8 @@ class ChatGPTClient extends BaseClient {
    if (!abortController) {
      abortController = new AbortController();
    }
-    const modelOptions = { ...this.modelOptions };
+
+    let modelOptions = { ...this.modelOptions };
    if (typeof onProgress === 'function') {
      modelOptions.stream = true;
    }
@@ -159,56 +167,171 @@ class ChatGPTClient extends BaseClient {
    }

    const { debug } = this.options;
-    const url = this.completionsUrl;
+    let baseURL = this.completionsUrl;
    if (debug) {
      console.debug();
-      console.debug(url);
+      console.debug(baseURL);
      console.debug(modelOptions);
      console.debug();
    }

-    if (this.azure || this.options.azure) {
-      // Azure does not accept `model` in the body, so we need to remove it.
-      delete modelOptions.model;
-    }
-
    const opts = {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
-      body: JSON.stringify(modelOptions),
      dispatcher: new Agent({
        bodyTimeout: 0,
        headersTimeout: 0,
      }),
    };

-    if (this.apiKey && this.options.azure) {
-      opts.headers['api-key'] = this.apiKey;
+    if (this.isVisionModel) {
+      modelOptions.max_tokens = 4000;
+    }
+
+    /** @type {TAzureConfig | undefined} */
+    const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
+
+    const isAzure = this.azure || this.options.azure;
+    if (
+      (isAzure && this.isVisionModel && azureConfig) ||
+      (azureConfig && this.isVisionModel && this.options.endpoint === EModelEndpoint.azureOpenAI)
+    ) {
+      const { modelGroupMap, groupMap } = azureConfig;
+      const {
+        azureOptions,
+        baseURL,
+        headers = {},
+        serverless,
+      } = mapModelToAzureConfig({
+        modelName: modelOptions.model,
+        modelGroupMap,
+        groupMap,
+      });
+      opts.headers = resolveHeaders(headers);
+      this.langchainProxy = extractBaseURL(baseURL);
+      this.apiKey = azureOptions.azureOpenAIApiKey;
+
+      const groupName = modelGroupMap[modelOptions.model].group;
+      this.options.addParams = azureConfig.groupMap[groupName].addParams;
+      this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
+      // Note: `forcePrompt` not re-assigned as only chat models are vision models
+
+      this.azure = !serverless && azureOptions;
+      this.azureEndpoint =
+        !serverless && genAzureChatCompletion(this.azure, modelOptions.model, this);
+    }
+
+    if (this.options.headers) {
+      opts.headers = { ...opts.headers, ...this.options.headers };
+    }
+
+    if (isAzure) {
+      // Azure does not accept `model` in the body, so we need to remove it.
+      delete modelOptions.model;
+
+      baseURL = this.langchainProxy
+        ? constructAzureURL({
+          baseURL: this.langchainProxy,
+          azureOptions: this.azure,
+        })
+        : this.azureEndpoint.split(/(?<!\/)\/(chat|completion)\//)[0];
+
+      if (this.options.forcePrompt) {
+        baseURL += '/completions';
+      } else {
+        baseURL += '/chat/completions';
+      }
+
+      opts.defaultQuery = { 'api-version': this.azure.azureOpenAIApiVersion };
+      opts.headers = { ...opts.headers, 'api-key': this.apiKey };
    } else if (this.apiKey) {
      opts.headers.Authorization = `Bearer ${this.apiKey}`;
    }

+    if (process.env.OPENAI_ORGANIZATION) {
+      opts.headers['OpenAI-Organization'] = process.env.OPENAI_ORGANIZATION;
+    }
+
    if (this.useOpenRouter) {
      opts.headers['HTTP-Referer'] = 'https://librechat.ai';
      opts.headers['X-Title'] = 'LibreChat';
    }

-    if (this.options.headers) {
-      opts.headers = { ...opts.headers, ...this.options.headers };
-    }
-
    if (this.options.proxy) {
      opts.dispatcher = new ProxyAgent(this.options.proxy);
    }

+    /* hacky fixes for Mistral AI API:
+      - Re-orders system message to the top of the messages payload, as not allowed anywhere else
+      - If there is only one message and it's a system message, change the role to user
+      */
+    if (baseURL.includes('https://api.mistral.ai/v1') && modelOptions.messages) {
+      const { messages } = modelOptions;
+
+      const systemMessageIndex = messages.findIndex((msg) => msg.role === 'system');
+
+      if (systemMessageIndex > 0) {
+        const [systemMessage] = messages.splice(systemMessageIndex, 1);
+        messages.unshift(systemMessage);
+      }
+
+      modelOptions.messages = messages;
+
+      if (messages.length === 1 && messages[0].role === 'system') {
+        modelOptions.messages[0].role = 'user';
+      }
+    }
+
+    if (this.options.addParams && typeof this.options.addParams === 'object') {
+      modelOptions = {
+        ...modelOptions,
+        ...this.options.addParams,
+      };
+      logger.debug('[ChatGPTClient] chatCompletion: added params', {
+        addParams: this.options.addParams,
+        modelOptions,
+      });
+    }
+
+    if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
+      this.options.dropParams.forEach((param) => {
+        delete modelOptions[param];
+      });
+      logger.debug('[ChatGPTClient] chatCompletion: dropped params', {
+        dropParams: this.options.dropParams,
+        modelOptions,
+      });
+    }
+
+    if (baseURL.includes('v1') && !baseURL.includes('/completions') && !this.isChatCompletion) {
+      baseURL = baseURL.split('v1')[0] + 'v1/completions';
+    } else if (
+      baseURL.includes('v1') &&
+      !baseURL.includes('/chat/completions') &&
+      this.isChatCompletion
+    ) {
+      baseURL = baseURL.split('v1')[0] + 'v1/chat/completions';
+    }
+
+    const BASE_URL = new URL(baseURL);
+    if (opts.defaultQuery) {
+      Object.entries(opts.defaultQuery).forEach(([key, value]) => {
+        BASE_URL.searchParams.append(key, value);
+      });
+      delete opts.defaultQuery;
+    }
+
+    const completionsURL = BASE_URL.toString();
+    opts.body = JSON.stringify(modelOptions);
+
    if (modelOptions.stream) {
      // eslint-disable-next-line no-async-promise-executor
      return new Promise(async (resolve, reject) => {
        try {
          let done = false;
-          await fetchEventSource(url, {
+          await fetchEventSource(completionsURL, {
            ...opts,
            signal: abortController.signal,
            async onopen(response) {
@@ -236,7 +359,6 @@ class ChatGPTClient extends BaseClient {
              // workaround for private API not sending [DONE] event
              if (!done) {
                onProgress('[DONE]');
-                abortController.abort();
                resolve();
              }
            },
@@ -249,14 +371,13 @@ class ChatGPTClient extends BaseClient {
            },
            onmessage(message) {
              if (debug) {
-                // console.debug(message);
+                console.debug(message);
              }
              if (!message.data || message.event === 'ping') {
                return;
              }
              if (message.data === '[DONE]') {
                onProgress('[DONE]');
-                abortController.abort();
                resolve();
                done = true;
                return;
@@ -269,7 +390,7 @@ class ChatGPTClient extends BaseClient {
        }
      });
    }
-    const response = await fetch(url, {
+    const response = await fetch(completionsURL, {
      ...opts,
      signal: abortController.signal,
    });
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@@ -4,7 +4,6 @@ const { GoogleVertexAI } = require('langchain/llms/googlevertexai');
 const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
 const { ChatGoogleVertexAI } = require('langchain/chat_models/googlevertexai');
 const { AIMessage, HumanMessage, SystemMessage } = require('langchain/schema');
-const { encodeAndFormat } = require('~/server/services/Files/images');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const {
  validateVisionModel,
@@ -13,8 +12,9 @@ const {
  EModelEndpoint,
  AuthKeys,
 } = require('librechat-data-provider');
+const { encodeAndFormat } = require('~/server/services/Files/images');
+const { formatMessage, createContextHandlers } = require('./prompts');
 const { getModelMaxTokens } = require('~/utils');
-const { formatMessage } = require('./prompts');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');

@@ -124,18 +124,11 @@ class GoogleClient extends BaseClient {
      // stop: modelOptions.stop // no stop method for now
    };

-    if (this.options.attachments) {
-      this.modelOptions.model = 'gemini-pro-vision';
-    }
+    this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));

    // TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
    this.isGenerativeModel = this.modelOptions.model.includes('gemini');
-    this.isVisionModel = validateVisionModel(this.modelOptions.model);
    const { isGenerativeModel } = this;
-    if (this.isVisionModel && !this.options.attachments) {
-      this.modelOptions.model = 'gemini-pro';
-      this.isVisionModel = false;
-    }
    this.isChatModel = !isGenerativeModel && this.modelOptions.model.includes('chat');
    const { isChatModel } = this;
    this.isTextModel =
@@ -220,6 +213,33 @@ class GoogleClient extends BaseClient {
    return this;
  }

+  /**
+   *
+   * Checks if the model is a vision model based on request attachments and sets the appropriate options:
+   * @param {MongoFile[]} attachments
+   */
+  checkVisionRequest(attachments) {
+    /* Validation vision request */
+    this.defaultVisionModel = this.options.visionModel ?? 'gemini-pro-vision';
+    const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
+    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
+
+    if (
+      attachments &&
+      attachments.some((file) => file?.type && file?.type?.includes('image')) &&
+      availableModels?.includes(this.defaultVisionModel) &&
+      !this.isVisionModel
+    ) {
+      this.modelOptions.model = this.defaultVisionModel;
+      this.isVisionModel = true;
+    }
+
+    if (this.isVisionModel && !attachments) {
+      this.modelOptions.model = 'gemini-pro';
+      this.isVisionModel = false;
+    }
+  }
+
  formatMessages() {
    return ((message) => ({
      author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
@@ -227,18 +247,45 @@ class GoogleClient extends BaseClient {
    })).bind(this);
  }

-  async buildVisionMessages(messages = [], parentMessageId) {
-    const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);
-    const attachments = await this.options.attachments;
+  /**
+   *
+   * Adds image URLs to the message object and returns the files
+   *
+   * @param {TMessage[]} messages
+   * @param {MongoFile[]} files
+   * @returns {Promise<MongoFile[]>}
+   */
+  async addImageURLs(message, attachments) {
    const { files, image_urls } = await encodeAndFormat(
      this.options.req,
-      attachments.filter((file) => file.type.includes('image')),
+      attachments,
      EModelEndpoint.google,
    );
+    message.image_urls = image_urls.length ? image_urls : undefined;
+    return files;
+  }

+  async buildVisionMessages(messages = [], parentMessageId) {
+    const attachments = await this.options.attachments;
    const latestMessage = { ...messages[messages.length - 1] };
+    this.contextHandlers = createContextHandlers(this.options.req, latestMessage.text);
+
+    if (this.contextHandlers) {
+      for (const file of attachments) {
+        if (file.embedded) {
+          this.contextHandlers?.processFile(file);
+          continue;
+        }
+      }
+
+      this.augmentedPrompt = await this.contextHandlers.createContext();
+      this.options.promptPrefix = this.augmentedPrompt + this.options.promptPrefix;
+    }
+
+    const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);
+
+    const files = await this.addImageURLs(latestMessage, attachments);

-    latestMessage.image_urls = image_urls;
    this.options.attachments = files;

    latestMessage.text = prompt;
@@ -265,7 +312,7 @@ class GoogleClient extends BaseClient {
      );
    }

-    if (this.options.attachments) {
+    if (this.options.attachments && this.isGenerativeModel) {
      return this.buildVisionMessages(messages, parentMessageId);
    }

--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@@ -1,10 +1,13 @@
 const OpenAI = require('openai');
 const { HttpsProxyAgent } = require('https-proxy-agent');
 const {
+  ImageDetail,
+  EModelEndpoint,
+  resolveHeaders,
+  ImageDetailCost,
  getResponseSender,
  validateVisionModel,
-  ImageDetailCost,
-  ImageDetail,
+  mapModelToAzureConfig,
 } = require('librechat-data-provider');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const {
@@ -13,14 +16,13 @@ const {
  getModelMaxTokens,
  genAzureChatCompletion,
 } = require('~/utils');
+const { truncateText, formatMessage, createContextHandlers, CUT_OFF_PROMPT } = require('./prompts');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
-const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
 const { handleOpenAIErrors } = require('./tools/util');
 const spendTokens = require('~/models/spendTokens');
 const { createLLM, RunManager } = require('./llm');
 const ChatGPTClient = require('./ChatGPTClient');
 const { isEnabled } = require('~/server/utils');
-const { getFiles } = require('~/models/File');
 const { summaryBuffer } = require('./memory');
 const { runTitleChain } = require('./chains');
 const { tokenSplit } = require('./document');
@@ -45,6 +47,7 @@ class OpenAIClient extends BaseClient {
    /** @type {AzureOptions} */
    this.azure = options.azure || false;
    this.setOptions(options);
+    this.metadata = {};
  }

  // TODO: PluginsClient calls this 3x, unneeded
@@ -88,7 +91,12 @@ class OpenAIClient extends BaseClient {
      };
    }

-    this.checkVisionRequest(this.options.attachments);
+    this.defaultVisionModel = this.options.visionModel ?? 'gpt-4-vision-preview';
+    if (typeof this.options.attachments?.then === 'function') {
+      this.options.attachments.then((attachments) => this.checkVisionRequest(attachments));
+    } else {
+      this.checkVisionRequest(this.options.attachments);
+    }

    const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
    if (OPENROUTER_API_KEY && !this.azure) {
@@ -219,13 +227,20 @@ class OpenAIClient extends BaseClient {
   * - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
   * - Sets `this.isVisionModel` to `true` if vision request.
   * - Deletes `this.modelOptions.stop` if vision request.
-   * @param {Array<Promise<MongoFile[]> | MongoFile[]> | Record<string, MongoFile[]>} attachments
+   * @param {MongoFile[]} attachments
   */
  checkVisionRequest(attachments) {
-    this.isVisionModel = validateVisionModel(this.modelOptions.model);
+    const availableModels = this.options.modelsConfig?.[this.options.endpoint];
+    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });

-    if (attachments && !this.isVisionModel) {
-      this.modelOptions.model = 'gpt-4-vision-preview';
+    const visionModelAvailable = availableModels?.includes(this.defaultVisionModel);
+    if (
+      attachments &&
+      attachments.some((file) => file?.type && file?.type?.includes('image')) &&
+      visionModelAvailable &&
+      !this.isVisionModel
+    ) {
+      this.modelOptions.model = this.defaultVisionModel;
      this.isVisionModel = true;
    }

@@ -360,7 +375,7 @@ class OpenAIClient extends BaseClient {
    return {
      chatGptLabel: this.options.chatGptLabel,
      promptPrefix: this.options.promptPrefix,
-      resendImages: this.options.resendImages,
+      resendFiles: this.options.resendFiles,
      imageDetail: this.options.imageDetail,
      ...this.modelOptions,
    };
@@ -374,54 +389,6 @@ class OpenAIClient extends BaseClient {
    };
  }

-  /**
-   *
-   * @param {TMessage[]} _messages
-   * @returns {TMessage[]}
-   */
-  async addPreviousAttachments(_messages) {
-    if (!this.options.resendImages) {
-      return _messages;
-    }
-
-    /**
-     *
-     * @param {TMessage} message
-     */
-    const processMessage = async (message) => {
-      if (!this.message_file_map) {
-        /** @type {Record<string, MongoFile[]> */
-        this.message_file_map = {};
-      }
-
-      const fileIds = message.files.map((file) => file.file_id);
-      const files = await getFiles({
-        file_id: { $in: fileIds },
-      });
-
-      await this.addImageURLs(message, files);
-
-      this.message_file_map[message.messageId] = files;
-      return message;
-    };
-
-    const promises = [];
-
-    for (const message of _messages) {
-      if (!message.files) {
-        promises.push(message);
-        continue;
-      }
-
-      promises.push(processMessage(message));
-    }
-
-    const messages = await Promise.all(promises);
-
-    this.checkVisionRequest(this.message_file_map);
-    return messages;
-  }
-
  /**
   *
   * Adds image URLs to the message object and returns the files
@@ -432,8 +399,7 @@ class OpenAIClient extends BaseClient {
   */
  async addImageURLs(message, attachments) {
    const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);
-
-    message.image_urls = image_urls;
+    message.image_urls = image_urls.length ? image_urls : undefined;
    return files;
  }

@@ -461,23 +427,9 @@ class OpenAIClient extends BaseClient {
    let promptTokens;

    promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
-    if (promptPrefix) {
-      promptPrefix = `Instructions:\n${promptPrefix}`;
-      instructions = {
-        role: 'system',
-        name: 'instructions',
-        content: promptPrefix,
-      };
-
-      if (this.contextStrategy) {
-        instructions.tokenCount = this.getTokenCountForMessage(instructions);
-      }
-    }

    if (this.options.attachments) {
-      const attachments = (await this.options.attachments).filter((file) =>
-        file.type.includes('image'),
-      );
+      const attachments = await this.options.attachments;

      if (this.message_file_map) {
        this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
@@ -495,6 +447,13 @@ class OpenAIClient extends BaseClient {
      this.options.attachments = files;
    }

+    if (this.message_file_map) {
+      this.contextHandlers = createContextHandlers(
+        this.options.req,
+        orderedMessages[orderedMessages.length - 1].text,
+      );
+    }
+
    const formattedMessages = orderedMessages.map((message, i) => {
      const formattedMessage = formatMessage({
        message,
@@ -513,6 +472,11 @@ class OpenAIClient extends BaseClient {
      if (this.message_file_map && this.message_file_map[message.messageId]) {
        const attachments = this.message_file_map[message.messageId];
        for (const file of attachments) {
+          if (file.embedded) {
+            this.contextHandlers?.processFile(file);
+            continue;
+          }
+
          orderedMessages[i].tokenCount += this.calculateImageTokenCost({
            width: file.width,
            height: file.height,
@@ -524,6 +488,24 @@ class OpenAIClient extends BaseClient {
      return formattedMessage;
    });

+    if (this.contextHandlers) {
+      this.augmentedPrompt = await this.contextHandlers.createContext();
+      promptPrefix = this.augmentedPrompt + promptPrefix;
+    }
+
+    if (promptPrefix) {
+      promptPrefix = `Instructions:\n${promptPrefix.trim()}`;
+      instructions = {
+        role: 'system',
+        name: 'instructions',
+        content: promptPrefix,
+      };
+
+      if (this.contextStrategy) {
+        instructions.tokenCount = this.getTokenCountForMessage(instructions);
+      }
+    }
+
    // TODO: need to handle interleaving instructions better
    if (this.contextStrategy) {
      ({ payload, tokenCountMap, promptTokens, messages } = await this.handleContextStrategy({
@@ -557,7 +539,7 @@ class OpenAIClient extends BaseClient {
    let streamResult = null;
    this.modelOptions.user = this.user;
    const invalidBaseUrl = this.completionsUrl && extractBaseURL(this.completionsUrl) === null;
-    const useOldMethod = !!(invalidBaseUrl || !this.isChatCompletion);
+    const useOldMethod = !!(invalidBaseUrl || !this.isChatCompletion || typeof Bun !== 'undefined');
    if (typeof opts.onProgress === 'function' && useOldMethod) {
      await this.getCompletion(
        payload,
@@ -597,7 +579,6 @@ class OpenAIClient extends BaseClient {
    } else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) {
      reply = await this.chatCompletion({
        payload,
-        clientOptions: opts,
        onProgress: opts.onProgress,
        abortController: opts.abortController,
      });
@@ -617,11 +598,11 @@ class OpenAIClient extends BaseClient {
      }
    }

-    if (streamResult && typeof opts.addMetadata === 'function') {
+    if (streamResult) {
      const { finish_reason } = streamResult.choices[0];
-      opts.addMetadata({ finish_reason });
+      this.metadata = { finish_reason };
    }
-    return reply.trim();
+    return (reply ?? '').trim();
  }

  initializeLLM({
@@ -665,6 +646,16 @@ class OpenAIClient extends BaseClient {
      };
    }

+    const { headers } = this.options;
+    if (headers && typeof headers === 'object' && !Array.isArray(headers)) {
+      configOptions.baseOptions = {
+        headers: resolveHeaders({
+          ...headers,
+          ...configOptions?.baseOptions?.headers,
+        }),
+      };
+    }
+
    if (this.options.proxy) {
      configOptions.httpAgent = new HttpsProxyAgent(this.options.proxy);
      configOptions.httpsAgent = new HttpsProxyAgent(this.options.proxy);
@@ -725,6 +716,39 @@ class OpenAIClient extends BaseClient {
      max_tokens: 16,
    };

+    /** @type {TAzureConfig | undefined} */
+    const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
+
+    const resetTitleOptions = !!(
+      (this.azure && azureConfig) ||
+      (azureConfig && this.options.endpoint === EModelEndpoint.azureOpenAI)
+    );
+
+    if (resetTitleOptions) {
+      const { modelGroupMap, groupMap } = azureConfig;
+      const {
+        azureOptions,
+        baseURL,
+        headers = {},
+        serverless,
+      } = mapModelToAzureConfig({
+        modelName: modelOptions.model,
+        modelGroupMap,
+        groupMap,
+      });
+
+      this.options.headers = resolveHeaders(headers);
+      this.options.reverseProxyUrl = baseURL ?? null;
+      this.langchainProxy = extractBaseURL(this.options.reverseProxyUrl);
+      this.apiKey = azureOptions.azureOpenAIApiKey;
+
+      const groupName = modelGroupMap[modelOptions.model].group;
+      this.options.addParams = azureConfig.groupMap[groupName].addParams;
+      this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
+      this.options.forcePrompt = azureConfig.groupMap[groupName].forcePrompt;
+      this.azure = !serverless && azureOptions;
+    }
+
    const titleChatCompletion = async () => {
      modelOptions.model = model;

@@ -901,7 +925,6 @@ ${convo}
  }

  async recordTokenUsage({ promptTokens, completionTokens }) {
-    logger.debug('[OpenAIClient] recordTokenUsage:', { promptTokens, completionTokens });
    await spendTokens(
      {
        user: this.user,
@@ -921,7 +944,7 @@ ${convo}
    });
  }

-  async chatCompletion({ payload, onProgress, clientOptions, abortController = null }) {
+  async chatCompletion({ payload, onProgress, abortController = null }) {
    let error = null;
    const errorCallback = (err) => (error = err);
    let intermediateReply = '';
@@ -942,15 +965,6 @@ ${convo}
      }

      const baseURL = extractBaseURL(this.completionsUrl);
-      // let { messages: _msgsToLog, ...modelOptionsToLog } = modelOptions;
-      // if (modelOptionsToLog.messages) {
-      //   _msgsToLog = modelOptionsToLog.messages.map((msg) => {
-      //     let { content, ...rest } = msg;
-
-      //     if (content)
-      //     return { ...rest, content: truncateText(content) };
-      //   });
-      // }
      logger.debug('[OpenAIClient] chatCompletion', { baseURL, modelOptions });
      const opts = {
        baseURL,
@@ -975,6 +989,38 @@ ${convo}
        modelOptions.max_tokens = 4000;
      }

+      /** @type {TAzureConfig | undefined} */
+      const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
+
+      if (
+        (this.azure && this.isVisionModel && azureConfig) ||
+        (azureConfig && this.isVisionModel && this.options.endpoint === EModelEndpoint.azureOpenAI)
+      ) {
+        const { modelGroupMap, groupMap } = azureConfig;
+        const {
+          azureOptions,
+          baseURL,
+          headers = {},
+          serverless,
+        } = mapModelToAzureConfig({
+          modelName: modelOptions.model,
+          modelGroupMap,
+          groupMap,
+        });
+        opts.defaultHeaders = resolveHeaders(headers);
+        this.langchainProxy = extractBaseURL(baseURL);
+        this.apiKey = azureOptions.azureOpenAIApiKey;
+
+        const groupName = modelGroupMap[modelOptions.model].group;
+        this.options.addParams = azureConfig.groupMap[groupName].addParams;
+        this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
+        // Note: `forcePrompt` not re-assigned as only chat models are vision models
+
+        this.azure = !serverless && azureOptions;
+        this.azureEndpoint =
+          !serverless && genAzureChatCompletion(this.azure, modelOptions.model, this);
+      }
+
      if (this.azure || this.options.azure) {
        // Azure does not accept `model` in the body, so we need to remove it.
        delete modelOptions.model;
@@ -982,9 +1028,10 @@ ${convo}
        opts.baseURL = this.langchainProxy
          ? constructAzureURL({
            baseURL: this.langchainProxy,
-            azure: this.azure,
+            azureOptions: this.azure,
          })
-          : this.azureEndpoint.split(/\/(chat|completion)/)[0];
+          : this.azureEndpoint.split(/(?<!\/)\/(chat|completion)\//)[0];
+
        opts.defaultQuery = { 'api-version': this.azure.azureOpenAIApiVersion };
        opts.defaultHeaders = { ...opts.defaultHeaders, 'api-key': this.apiKey };
      }
@@ -994,6 +1041,7 @@ ${convo}
      }

      let chatCompletion;
+      /** @type {OpenAI} */
      const openai = new OpenAI({
        apiKey: this.apiKey,
        ...opts,
@@ -1025,12 +1073,20 @@ ${convo}
          ...modelOptions,
          ...this.options.addParams,
        };
+        logger.debug('[OpenAIClient] chatCompletion: added params', {
+          addParams: this.options.addParams,
+          modelOptions,
+        });
      }

      if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
        this.options.dropParams.forEach((param) => {
          delete modelOptions[param];
        });
+        logger.debug('[OpenAIClient] chatCompletion: dropped params', {
+          dropParams: this.options.dropParams,
+          modelOptions,
+        });
      }

      let UnexpectedRoleError = false;
@@ -1046,6 +1102,16 @@ ${convo}
          .on('error', (err) => {
            handleOpenAIErrors(err, errorCallback, 'stream');
          })
+          .on('finalChatCompletion', (finalChatCompletion) => {
+            const finalMessage = finalChatCompletion?.choices?.[0]?.message;
+            if (finalMessage && finalMessage?.role !== 'assistant') {
+              finalChatCompletion.choices[0].message.role = 'assistant';
+            }
+
+            if (finalMessage && !finalMessage?.content?.trim()) {
+              finalChatCompletion.choices[0].message.content = intermediateReply;
+            }
+          })
          .on('finalMessage', (message) => {
            if (message?.role !== 'assistant') {
              stream.messages.push({ role: 'assistant', content: intermediateReply });
@@ -1091,12 +1157,20 @@ ${convo}
      }

      const { message, finish_reason } = chatCompletion.choices[0];
-      if (chatCompletion && typeof clientOptions.addMetadata === 'function') {
-        clientOptions.addMetadata({ finish_reason });
+      if (chatCompletion) {
+        this.metadata = { finish_reason };
      }

      logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);

+      if (!message?.content?.trim() && intermediateReply.length) {
+        logger.debug(
+          '[OpenAIClient] chatCompletion: using intermediateReply due to empty message.content',
+          { intermediateReply },
+        );
+        return intermediateReply;
+      }
+
      return message.content;
    } catch (err) {
      if (
@@ -1109,6 +1183,9 @@ ${convo}
        err?.message?.includes(
          'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
        ) ||
+        err?.message?.includes(
+          'stream ended without producing a ChatCompletionMessage with role=assistant',
+        ) ||
        err?.message?.includes('The server had an error processing your request') ||
        err?.message?.includes('missing finish_reason') ||
        err?.message?.includes('missing role') ||
--- a/api/app/clients/PluginsClient.js
+++ b/api/app/clients/PluginsClient.js
@@ -31,10 +31,6 @@ class PluginsClient extends OpenAIClient {

    super.setOptions(options);

-    if (this.functionsAgent && this.agentOptions.model && !this.useOpenRouter && !this.azure) {
-      this.agentOptions.model = this.getFunctionModelName(this.agentOptions.model);
-    }
-
    this.isGpt3 = this.modelOptions?.model?.includes('gpt-3');

    if (this.options.reverseProxyUrl) {
--- a/api/app/clients/llm/createLLM.js
+++ b/api/app/clients/llm/createLLM.js
@@ -55,16 +55,18 @@ function createLLM({
  }

  if (azure && configOptions.basePath) {
-    configOptions.basePath = constructAzureURL({
+    const azureURL = constructAzureURL({
      baseURL: configOptions.basePath,
-      azure: azureOptions,
+      azureOptions,
    });
+    azureOptions.azureOpenAIBasePath = azureURL.split(
+      `/${azureOptions.azureOpenAIApiDeploymentName}`,
+    )[0];
  }

  return new ChatOpenAI(
    {
      streaming,
-      verbose: true,
      credentials,
      configuration,
      ...azureOptions,
--- a/api/app/clients/prompts/createContextHandlers.js
+++ b/api/app/clients/prompts/createContextHandlers.js
@@ -0,0 +1,159 @@
+const axios = require('axios');
+const { isEnabled } = require('~/server/utils');
+const { logger } = require('~/config');
+
+const footer = `Use the context as your learned knowledge to better answer the user.
+
+In your response, remember to follow these guidelines:
+- If you don't know the answer, simply say that you don't know.
+- If you are unsure how to answer, ask for clarification.
+- Avoid mentioning that you obtained the information from the context.
+
+Answer appropriately in the user's language.
+`;
+
+function createContextHandlers(req, userMessageContent) {
+  if (!process.env.RAG_API_URL) {
+    return;
+  }
+
+  const queryPromises = [];
+  const processedFiles = [];
+  const processedIds = new Set();
+  const jwtToken = req.headers.authorization.split(' ')[1];
+  const useFullContext = isEnabled(process.env.RAG_USE_FULL_CONTEXT);
+
+  const query = async (file) => {
+    if (useFullContext) {
+      return axios.get(`${process.env.RAG_API_URL}/documents/${file.file_id}/context`, {
+        headers: {
+          Authorization: `Bearer ${jwtToken}`,
+        },
+      });
+    }
+
+    return axios.post(
+      `${process.env.RAG_API_URL}/query`,
+      {
+        file_id: file.file_id,
+        query: userMessageContent,
+        k: 4,
+      },
+      {
+        headers: {
+          Authorization: `Bearer ${jwtToken}`,
+          'Content-Type': 'application/json',
+        },
+      },
+    );
+  };
+
+  const processFile = async (file) => {
+    if (file.embedded && !processedIds.has(file.file_id)) {
+      try {
+        const promise = query(file);
+        queryPromises.push(promise);
+        processedFiles.push(file);
+        processedIds.add(file.file_id);
+      } catch (error) {
+        logger.error(`Error processing file ${file.filename}:`, error);
+      }
+    }
+  };
+
+  const createContext = async () => {
+    try {
+      if (!queryPromises.length || !processedFiles.length) {
+        return '';
+      }
+
+      const oneFile = processedFiles.length === 1;
+      const header = `The user has attached ${oneFile ? 'a' : processedFiles.length} file${
+        !oneFile ? 's' : ''
+      } to the conversation:`;
+
+      const files = `${
+        oneFile
+          ? ''
+          : `
+      <files>`
+      }${processedFiles
+        .map(
+          (file) => `
+              <file>
+                <filename>${file.filename}</filename>
+                <type>${file.type}</type>
+              </file>`,
+        )
+        .join('')}${
+        oneFile
+          ? ''
+          : `
+        </files>`
+      }`;
+
+      const resolvedQueries = await Promise.all(queryPromises);
+
+      const context = resolvedQueries
+        .map((queryResult, index) => {
+          const file = processedFiles[index];
+          let contextItems = queryResult.data;
+
+          const generateContext = (currentContext) =>
+            `
+          <file>
+            <filename>${file.filename}</filename>
+            <context>${currentContext}
+            </context>
+          </file>`;
+
+          if (useFullContext) {
+            return generateContext(`\n${contextItems}`);
+          }
+
+          contextItems = queryResult.data
+            .map((item) => {
+              const pageContent = item[0].page_content;
+              return `
+            <contextItem>
+              <![CDATA[${pageContent?.trim()}]]>
+            </contextItem>`;
+            })
+            .join('');
+
+          return generateContext(contextItems);
+        })
+        .join('');
+
+      if (useFullContext) {
+        const prompt = `${header}
+          ${context}
+          ${footer}`;
+
+        return prompt;
+      }
+
+      const prompt = `${header}
+        ${files}
+
+        A semantic search was executed with the user's message as the query, retrieving the following context inside <context></context> XML tags.
+
+        <context>${context}
+        </context>
+
+        ${footer}`;
+
+      return prompt;
+    } catch (error) {
+      logger.error('Error creating context:', error);
+      throw error;
+    }
+  };
+
+  return {
+    processFile,
+    createContext,
+  };
+}
+
+module.exports = createContextHandlers;
--- a/api/app/clients/prompts/createVisionPrompt.js
+++ b/api/app/clients/prompts/createVisionPrompt.js
@@ -0,0 +1,34 @@
+/**
+ * Generates a prompt instructing the user to describe an image in detail, tailored to different types of visual content.
+ * @param {boolean} pluralized - Whether to pluralize the prompt for multiple images.
+ * @returns {string} - The generated vision prompt.
+ */
+const createVisionPrompt = (pluralized = false) => {
+  return `Please describe the image${
+    pluralized ? 's' : ''
+  } in detail, covering relevant aspects such as:
+
+  For photographs, illustrations, or artwork:
+  - The main subject(s) and their appearance, positioning, and actions
+  - The setting, background, and any notable objects or elements
+  - Colors, lighting, and overall mood or atmosphere
+  - Any interesting details, textures, or patterns
+  - The style, technique, or medium used (if discernible)
+  
+  For screenshots or images containing text:
+  - The content and purpose of the text
+  - The layout, formatting, and organization of the information
+  - Any notable visual elements, such as logos, icons, or graphics
+  - The overall context or message conveyed by the screenshot
+  
+  For graphs, charts, or data visualizations:
+  - The type of graph or chart (e.g., bar graph, line chart, pie chart)
+  - The variables being compared or analyzed
+  - Any trends, patterns, or outliers in the data
+  - The axis labels, scales, and units of measurement
+  - The title, legend, and any additional context provided
+  
+  Be as specific and descriptive as possible while maintaining clarity and concision.`;
+};
+
+module.exports = createVisionPrompt;
--- a/api/app/clients/prompts/formatMessages.js
+++ b/api/app/clients/prompts/formatMessages.js
@@ -1,3 +1,4 @@
+const { EModelEndpoint } = require('librechat-data-provider');
 const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema');

 /**
@@ -7,10 +8,16 @@ const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema');
 * @param {Object} params.message - The message object to format.
 * @param {string} [params.message.role] - The role of the message sender (must be 'user').
 * @param {string} [params.message.content] - The text content of the message.
+ * @param {EModelEndpoint} [params.endpoint] - Identifier for specific endpoint handling
 * @param {Array<string>} [params.image_urls] - The image_urls to attach to the message.
 * @returns {(Object)} - The formatted message.
 */
-const formatVisionMessage = ({ message, image_urls }) => {
+const formatVisionMessage = ({ message, image_urls, endpoint }) => {
+  if (endpoint === EModelEndpoint.anthropic) {
+    message.content = [...image_urls, { type: 'text', text: message.content }];
+    return message;
+  }
+
  message.content = [{ type: 'text', text: message.content }, ...image_urls];

  return message;
@@ -29,10 +36,11 @@ const formatVisionMessage = ({ message, image_urls }) => {
 * @param {Array<string>} [params.message.image_urls] - The image_urls attached to the message for Vision API.
 * @param {string} [params.userName] - The name of the user.
 * @param {string} [params.assistantName] - The name of the assistant.
+ * @param {string} [params.endpoint] - Identifier for specific endpoint handling
 * @param {boolean} [params.langChain=false] - Whether to return a LangChain message object.
 * @returns {(Object|HumanMessage|AIMessage|SystemMessage)} - The formatted message.
 */
-const formatMessage = ({ message, userName, assistantName, langChain = false }) => {
+const formatMessage = ({ message, userName, assistantName, endpoint, langChain = false }) => {
  let { role: _role, _name, sender, text, content: _content, lc_id } = message;
  if (lc_id && lc_id[2] && !langChain) {
    const roleMapping = {
@@ -51,7 +59,11 @@ const formatMessage = ({ message, userName, assistantName, langChain = false })

  const { image_urls } = message;
  if (Array.isArray(image_urls) && image_urls.length > 0 && role === 'user') {
-    return formatVisionMessage({ message: formattedMessage, image_urls: message.image_urls });
+    return formatVisionMessage({
+      message: formattedMessage,
+      image_urls: message.image_urls,
+      endpoint,
+    });
  }

  if (_name) {
--- a/api/app/clients/prompts/index.js
+++ b/api/app/clients/prompts/index.js
@@ -4,6 +4,8 @@ const handleInputs = require('./handleInputs');
 const instructions = require('./instructions');
 const titlePrompts = require('./titlePrompts');
 const truncateText = require('./truncateText');
+const createVisionPrompt = require('./createVisionPrompt');
+const createContextHandlers = require('./createContextHandlers');

 module.exports = {
  ...formatMessages,
@@ -12,4 +14,6 @@ module.exports = {
  ...instructions,
  ...titlePrompts,
  truncateText,
+  createVisionPrompt,
+  createContextHandlers,
 };
--- a/api/app/clients/prompts/titlePrompts.js
+++ b/api/app/clients/prompts/titlePrompts.js
@@ -27,7 +27,60 @@ ${convo}`,
  return titlePrompt;
 };

+const titleFunctionPrompt = `In this environment you have access to a set of tools you can use to generate the conversation title.
+  
+You may call them like this:
+<function_calls>
+<invoke>
+<tool_name>$TOOL_NAME</tool_name>
+<parameters>
+<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
+...
+</parameters>
+</invoke>
+</function_calls>
+
+Here are the tools available:
+<tools>
+<tool_description>
+<tool_name>submit_title</tool_name>
+<description>
+Submit a brief title in the conversation's language, following the parameter description closely.
+</description>
+<parameters>
+<parameter>
+<name>title</name>
+<type>string</type>
+<description>A concise, 5-word-or-less title for the conversation, using its same language, with no punctuation. Apply title case conventions appropriate for the language. For English, use AP Stylebook Title Case. Never directly mention the language name or the word "title"</description>
+</parameter>
+</parameters>
+</tool_description>
+</tools>`;
+
+/**
+ * Parses titles from title functions based on the provided prompt.
+ * @param {string} prompt - The prompt containing the title function.
+ * @returns {string} The parsed title. "New Chat" if no title is found.
+ */
+function parseTitleFromPrompt(prompt) {
+  const titleRegex = /<title>(.+?)<\/title>/;
+  const titleMatch = prompt.match(titleRegex);
+
+  if (titleMatch && titleMatch[1]) {
+    const title = titleMatch[1].trim();
+
+    // // Capitalize the first letter of each word; Note: unnecessary due to title case prompting
+    // const capitalizedTitle = title.replace(/\b\w/g, (char) => char.toUpperCase());
+
+    return title;
+  }
+
+  return 'New Chat';
+}
+
 module.exports = {
  langPrompt,
  createTitlePrompt,
+  titleFunctionPrompt,
+  parseTitleFromPrompt,
 };
--- a/api/app/clients/tools/GoogleSearch.js
+++ b/api/app/clients/tools/GoogleSearch.js
@@ -1,121 +0,0 @@
-const { google } = require('googleapis');
-const { Tool } = require('langchain/tools');
-const { logger } = require('~/config');
-
-/**
- * Represents a tool that allows an agent to use the Google Custom Search API.
- * @extends Tool
- */
-class GoogleSearchAPI extends Tool {
-  constructor(fields = {}) {
-    super();
-    this.cx = fields.GOOGLE_CSE_ID || this.getCx();
-    this.apiKey = fields.GOOGLE_API_KEY || this.getApiKey();
-    this.customSearch = undefined;
-  }
-
-  /**
-   * The name of the tool.
-   * @type {string}
-   */
-  name = 'google';
-
-  /**
-   * A description for the agent to use
-   * @type {string}
-   */
-  description =
-    'Use the \'google\' tool to retrieve internet search results relevant to your input. The results will return links and snippets of text from the webpages';
-  description_for_model =
-    'Use the \'google\' tool to retrieve internet search results relevant to your input. The results will return links and snippets of text from the webpages';
-
-  getCx() {
-    const cx = process.env.GOOGLE_CSE_ID || '';
-    if (!cx) {
-      throw new Error('Missing GOOGLE_CSE_ID environment variable.');
-    }
-    return cx;
-  }
-
-  getApiKey() {
-    const apiKey = process.env.GOOGLE_API_KEY || '';
-    if (!apiKey) {
-      throw new Error('Missing GOOGLE_API_KEY environment variable.');
-    }
-    return apiKey;
-  }
-
-  getCustomSearch() {
-    if (!this.customSearch) {
-      const version = 'v1';
-      this.customSearch = google.customsearch(version);
-    }
-    return this.customSearch;
-  }
-
-  resultsToReadableFormat(results) {
-    let output = 'Results:\n';
-
-    results.forEach((resultObj, index) => {
-      output += `Title: ${resultObj.title}\n`;
-      output += `Link: ${resultObj.link}\n`;
-      if (resultObj.snippet) {
-        output += `Snippet: ${resultObj.snippet}\n`;
-      }
-
-      if (index < results.length - 1) {
-        output += '\n';
-      }
-    });
-
-    return output;
-  }
-
-  /**
-   * Calls the tool with the provided input and returns a promise that resolves with a response from the Google Custom Search API.
-   * @param {string} input - The input to provide to the API.
-   * @returns {Promise<String>} A promise that resolves with a response from the Google Custom Search API.
-   */
-  async _call(input) {
-    try {
-      const metadataResults = [];
-      const response = await this.getCustomSearch().cse.list({
-        q: input,
-        cx: this.cx,
-        auth: this.apiKey,
-        num: 5, // Limit the number of results to 5
-      });
-
-      // return response.data;
-      // logger.debug(response.data);
-
-      if (!response.data.items || response.data.items.length === 0) {
-        return this.resultsToReadableFormat([
-          { title: 'No good Google Search Result was found', link: '' },
-        ]);
-      }
-
-      // const results = response.items.slice(0, numResults);
-      const results = response.data.items;
-
-      for (const result of results) {
-        const metadataResult = {
-          title: result.title || '',
-          link: result.link || '',
-        };
-        if (result.snippet) {
-          metadataResult.snippet = result.snippet;
-        }
-        metadataResults.push(metadataResult);
-      }
-
-      return this.resultsToReadableFormat(metadataResults);
-    } catch (error) {
-      logger.error('[GoogleSearchAPI]', error);
-      // throw error;
-      return 'There was an error searching Google.';
-    }
-  }
-}
-
-module.exports = GoogleSearchAPI;
--- a/api/app/clients/tools/index.js
+++ b/api/app/clients/tools/index.js
@@ -1,7 +1,6 @@
 const availableTools = require('./manifest.json');
 // Basic Tools
 const CodeBrew = require('./CodeBrew');
-const GoogleSearchAPI = require('./GoogleSearch');
 const WolframAlphaAPI = require('./Wolfram');
 const AzureAiSearch = require('./AzureAiSearch');
 const OpenAICreateImage = require('./DALL-E');
@@ -16,8 +15,10 @@ const CodeSherpa = require('./structured/CodeSherpa');
 const StructuredSD = require('./structured/StableDiffusion');
 const StructuredACS = require('./structured/AzureAISearch');
 const CodeSherpaTools = require('./structured/CodeSherpaTools');
+const GoogleSearchAPI = require('./structured/GoogleSearch');
 const StructuredWolfram = require('./structured/Wolfram');
 const TavilySearchResults = require('./structured/TavilySearchResults');
+const TraversaalSearch = require('./structured/TraversaalSearch');

 module.exports = {
  availableTools,
@@ -39,4 +40,5 @@ module.exports = {
  CodeSherpaTools,
  StructuredWolfram,
  TavilySearchResults,
+  TraversaalSearch,
 };
--- a/api/app/clients/tools/manifest.json
+++ b/api/app/clients/tools/manifest.json
@@ -1,4 +1,17 @@
 [
+  {
+    "name": "Traversaal",
+    "pluginKey": "traversaal_search",
+    "description": "Traversaal is a robust search API tailored for LLM Agents. Get an API key here: https://api.traversaal.ai",
+    "icon": "https://traversaal.ai/favicon.ico",
+    "authConfig": [
+      {
+        "authField": "TRAVERSAAL_API_KEY",
+        "label": "Traversaal API Key",
+        "description": "Get your API key here: <a href=\"https://api.traversaal.ai\" target=\"_blank\">https://api.traversaal.ai</a>"
+      }
+    ]
+  },
  {
    "name": "Google",
    "pluginKey": "google",
@@ -111,7 +124,7 @@
  {
    "name": "Tavily Search",
    "pluginKey": "tavily_search_results_json",
-    "description": "Tavily Search is a robust search API tailored specifically for LLM Agents. It seamlessly integrates with diverse data sources to ensure a superior, relevant search experience.",
+    "description": "Tavily Search is a robust search API tailored for LLM Agents. It seamlessly integrates with diverse data sources to ensure a superior, relevant search experience.",
    "icon": "https://tavily.com/favicon.ico",
    "authConfig": [
      {
--- a/api/app/clients/tools/structured/DALLE3.js
+++ b/api/app/clients/tools/structured/DALLE3.js
@@ -12,14 +12,15 @@ const { logger } = require('~/config');
 class DALLE3 extends Tool {
  constructor(fields = {}) {
    super();
-    /* Used to initialize the Tool without necessary variables. */
+    /** @type {boolean} Used to initialize the Tool without necessary variables. */
    this.override = fields.override ?? false;
-    /* Necessary for output to contain all image metadata. */
+    /** @type {boolean} Necessary for output to contain all image metadata. */
    this.returnMetadata = fields.returnMetadata ?? false;

    this.userId = fields.userId;
    this.fileStrategy = fields.fileStrategy;
    if (fields.processFileURL) {
+      /** @type {processFileURL} Necessary for output to contain all image metadata. */
      this.processFileURL = fields.processFileURL.bind(this);
    }

@@ -43,6 +44,7 @@ class DALLE3 extends Tool {
      config.httpAgent = new HttpsProxyAgent(process.env.PROXY);
    }

+    /** @type {OpenAI} */
    this.openai = new OpenAI(config);
    this.name = 'dalle';
    this.description = `Use DALLE to create images from text descriptions.
@@ -164,13 +166,7 @@ Error Message: ${error.message}`;
      });

      if (this.returnMetadata) {
-        this.result = {
-          file_id: result.file_id,
-          filename: result.filename,
-          filepath: result.filepath,
-          height: result.height,
-          width: result.width,
-        };
+        this.result = result;
      } else {
        this.result = this.wrapInMarkdown(result.filepath);
      }
--- a/api/app/clients/tools/structured/GoogleSearch.js
+++ b/api/app/clients/tools/structured/GoogleSearch.js
@@ -0,0 +1,65 @@
+const { z } = require('zod');
+const { Tool } = require('@langchain/core/tools');
+const { getEnvironmentVariable } = require('@langchain/core/utils/env');
+
+class GoogleSearchResults extends Tool {
+  static lc_name() {
+    return 'GoogleSearchResults';
+  }
+
+  constructor(fields = {}) {
+    super(fields);
+    this.envVarApiKey = 'GOOGLE_API_KEY';
+    this.envVarSearchEngineId = 'GOOGLE_CSE_ID';
+    this.override = fields.override ?? false;
+    this.apiKey = fields.apiKey ?? getEnvironmentVariable(this.envVarApiKey);
+    this.searchEngineId =
+      fields.searchEngineId ?? getEnvironmentVariable(this.envVarSearchEngineId);
+
+    this.kwargs = fields?.kwargs ?? {};
+    this.name = 'google';
+    this.description =
+      'A search engine optimized for comprehensive, accurate, and trusted results. Useful for when you need to answer questions about current events.';
+
+    this.schema = z.object({
+      query: z.string().min(1).describe('The search query string.'),
+      max_results: z
+        .number()
+        .min(1)
+        .max(10)
+        .optional()
+        .describe('The maximum number of search results to return. Defaults to 10.'),
+      // Note: Google API has its own parameters for search customization, adjust as needed.
+    });
+  }
+
+  async _call(input) {
+    const validationResult = this.schema.safeParse(input);
+    if (!validationResult.success) {
+      throw new Error(`Validation failed: ${JSON.stringify(validationResult.error.issues)}`);
+    }
+
+    const { query, max_results = 5 } = validationResult.data;
+
+    const response = await fetch(
+      `https://www.googleapis.com/customsearch/v1?key=${this.apiKey}&cx=${
+        this.searchEngineId
+      }&q=${encodeURIComponent(query)}&num=${max_results}`,
+      {
+        method: 'GET',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    );
+
+    const json = await response.json();
+    if (!response.ok) {
+      throw new Error(`Request failed with status ${response.status}: ${json.error.message}`);
+    }
+
+    return JSON.stringify(json);
+  }
+}
+
+module.exports = GoogleSearchResults;
--- a/api/app/clients/tools/structured/StableDiffusion.js
+++ b/api/app/clients/tools/structured/StableDiffusion.js
@@ -4,14 +4,27 @@ const { z } = require('zod');
 const path = require('path');
 const axios = require('axios');
 const sharp = require('sharp');
+const { v4: uuidv4 } = require('uuid');
 const { StructuredTool } = require('langchain/tools');
+const { FileContext } = require('librechat-data-provider');
+const paths = require('~/config/paths');
 const { logger } = require('~/config');

 class StableDiffusionAPI extends StructuredTool {
  constructor(fields) {
    super();
-    /* Used to initialize the Tool without necessary variables. */
+    /** @type {string} User ID */
+    this.userId = fields.userId;
+    /** @type {Express.Request | undefined} Express Request object, only provided by ToolService */
+    this.req = fields.req;
+    /** @type {boolean} Used to initialize the Tool without necessary variables. */
    this.override = fields.override ?? false;
+    /** @type {boolean} Necessary for output to contain all image metadata. */
+    this.returnMetadata = fields.returnMetadata ?? false;
+    if (fields.uploadImageBuffer) {
+      /** @type {uploadImageBuffer} Necessary for output to contain all image metadata. */
+      this.uploadImageBuffer = fields.uploadImageBuffer.bind(this);
+    }

    this.name = 'stable-diffusion';
    this.url = fields.SD_WEBUI_URL || this.getServerURL();
@@ -47,7 +60,7 @@ class StableDiffusionAPI extends StructuredTool {

  getMarkdownImageUrl(imageName) {
    const imageUrl = path
-      .join(this.relativeImageUrl, imageName)
+      .join(this.relativePath, this.userId, imageName)
      .replace(/\\/g, '/')
      .replace('public/', '');
    return `![generated image](/${imageUrl})`;
@@ -73,46 +86,67 @@ class StableDiffusionAPI extends StructuredTool {
      width: 1024,
      height: 1024,
    };
-    const response = await axios.post(`${url}/sdapi/v1/txt2img`, payload);
-    const image = response.data.images[0];
-    const pngPayload = { image: `data:image/png;base64,${image}` };
-    const response2 = await axios.post(`${url}/sdapi/v1/png-info`, pngPayload);
-    const info = response2.data.info;
+    const generationResponse = await axios.post(`${url}/sdapi/v1/txt2img`, payload);
+    const image = generationResponse.data.images[0];

-    // Generate unique name
-    const imageName = `${Date.now()}.png`;
-    this.outputPath = path.resolve(
-      __dirname,
-      '..',
-      '..',
-      '..',
-      '..',
-      '..',
-      'client',
-      'public',
-      'images',
-    );
-    const appRoot = path.resolve(__dirname, '..', '..', '..', '..', '..', 'client');
-    this.relativeImageUrl = path.relative(appRoot, this.outputPath);
+    /** @type {{ height: number, width: number, seed: number, infotexts: string[] }} */
+    let info = {};
+    try {
+      info = JSON.parse(generationResponse.data.info);
+    } catch (error) {
+      logger.error('[StableDiffusion] Error while getting image metadata:', error);
+    }

-    // Check if directory exists, if not create it
-    if (!fs.existsSync(this.outputPath)) {
-      fs.mkdirSync(this.outputPath, { recursive: true });
+    const file_id = uuidv4();
+    const imageName = `${file_id}.png`;
+    const { imageOutput: imageOutputPath, clientPath } = paths;
+    const filepath = path.join(imageOutputPath, this.userId, imageName);
+    this.relativePath = path.relative(clientPath, imageOutputPath);
+
+    if (!fs.existsSync(path.join(imageOutputPath, this.userId))) {
+      fs.mkdirSync(path.join(imageOutputPath, this.userId), { recursive: true });
    }

    try {
      const buffer = Buffer.from(image.split(',', 1)[0], 'base64');
+      if (this.returnMetadata && this.uploadImageBuffer && this.req) {
+        const file = await this.uploadImageBuffer({
+          req: this.req,
+          context: FileContext.image_generation,
+          resize: false,
+          metadata: {
+            buffer,
+            height: info.height,
+            width: info.width,
+            bytes: Buffer.byteLength(buffer),
+            filename: imageName,
+            type: 'image/png',
+            file_id,
+          },
+        });
+
+        const generationInfo = info.infotexts[0].split('\n').pop();
+        return {
+          ...file,
+          prompt,
+          metadata: {
+            negative_prompt,
+            seed: info.seed,
+            info: generationInfo,
+          },
+        };
+      }
+
      await sharp(buffer)
        .withMetadata({
          iptcpng: {
-            parameters: info,
+            parameters: info.infotexts[0],
          },
        })
-        .toFile(this.outputPath + '/' + imageName);
+        .toFile(filepath);
      this.result = this.getMarkdownImageUrl(imageName);
    } catch (error) {
      logger.error('[StableDiffusion] Error while saving the image:', error);
-      // this.result = theImageUrl;
    }

    return this.result;
--- a/api/app/clients/tools/structured/TraversaalSearch.js
+++ b/api/app/clients/tools/structured/TraversaalSearch.js
@@ -0,0 +1,89 @@
+const { z } = require('zod');
+const { Tool } = require('@langchain/core/tools');
+const { getEnvironmentVariable } = require('@langchain/core/utils/env');
+const { logger } = require('~/config');
+
+/**
+ * Tool for the Traversaal AI search API, Ares.
+ */
+class TraversaalSearch extends Tool {
+  static lc_name() {
+    return 'TraversaalSearch';
+  }
+  constructor(fields) {
+    super(fields);
+    this.name = 'traversaal_search';
+    this.description = `An AI search engine optimized for comprehensive, accurate, and trusted results.
+    Useful for when you need to answer questions about current events. Input should be a search query.`;
+    this.description_for_model =
+      '\'Please create a specific sentence for the AI to understand and use as a query to search the web based on the user\'s request. For example, "Find information about the highest mountains in the world." or "Show me the latest news articles about climate change and its impact on polar ice caps."\'';
+    this.schema = z.object({
+      query: z
+        .string()
+        .describe(
+          'A properly written sentence to be interpreted by an AI to search the web according to the user\'s request.',
+        ),
+    });
+
+    this.apiKey = fields?.TRAVERSAAL_API_KEY ?? this.getApiKey();
+  }
+
+  getApiKey() {
+    const apiKey = getEnvironmentVariable('TRAVERSAAL_API_KEY');
+    if (!apiKey && this.override) {
+      throw new Error(
+        'No Traversaal API key found. Either set an environment variable named "TRAVERSAAL_API_KEY" or pass an API key as "apiKey".',
+      );
+    }
+    return apiKey;
+  }
+
+  // eslint-disable-next-line no-unused-vars
+  async _call({ query }, _runManager) {
+    const body = {
+      query: [query],
+    };
+    try {
+      const response = await fetch('https://api-ares.traversaal.ai/live/predict', {
+        method: 'POST',
+        headers: {
+          'content-type': 'application/json',
+          'x-api-key': this.apiKey,
+        },
+        body: JSON.stringify({ ...body }),
+      });
+      const json = await response.json();
+      if (!response.ok) {
+        throw new Error(
+          `Request failed with status code ${response.status}: ${json.error ?? json.message}`,
+        );
+      }
+      if (!json.data) {
+        throw new Error('Could not parse Traversaal API results. Please try again.');
+      }
+
+      const baseText = json.data?.response_text ?? '';
+      const sources = json.data?.web_url;
+      const noResponse = 'No response found in Traversaal API results';
+
+      if (!baseText && !sources) {
+        return noResponse;
+      }
+
+      const sourcesText = sources?.length ? '\n\nSources:\n - ' + sources.join('\n - ') : '';
+
+      const result = baseText + sourcesText;
+
+      if (!result) {
+        return noResponse;
+      }
+
+      return result;
+    } catch (error) {
+      logger.error('Traversaal API request failed', error);
+      return `Traversaal API request failed: ${error.message}`;
+    }
+  }
+}
+
+module.exports = TraversaalSearch;
--- a/api/app/clients/tools/util/handleTools.js
+++ b/api/app/clients/tools/util/handleTools.js
@@ -20,6 +20,7 @@ const {
  StructuredSD,
  StructuredACS,
  CodeSherpaTools,
+  TraversaalSearch,
  StructuredWolfram,
  TavilySearchResults,
 } = require('../');
@@ -165,6 +166,7 @@ const loadTools = async ({
    'stable-diffusion': functions ? StructuredSD : StableDiffusionAPI,
    'azure-ai-search': functions ? StructuredACS : AzureAISearch,
    CodeBrew: CodeBrew,
+    traversaal_search: TraversaalSearch,
  };

  const openAIApiKey = await getOpenAIKey(options, user);
@@ -235,9 +237,11 @@ const loadTools = async ({
  }

  const imageGenOptions = {
+    req: options.req,
    fileStrategy: options.fileStrategy,
    processFileURL: options.processFileURL,
    returnMetadata: options.returnMetadata,
+    uploadImageBuffer: options.uploadImageBuffer,
  };

  const toolOptions = {
--- a/api/app/clients/tools/util/loadToolSuite.js
+++ b/api/app/clients/tools/util/loadToolSuite.js
@@ -1,5 +1,6 @@
 const { getUserPluginAuthValue } = require('~/server/services/PluginService');
 const { availableTools } = require('../');
+const { logger } = require('~/config');

 /**
 * Loads a suite of tools with authentication values for a given user, supporting alternate authentication fields.
@@ -30,7 +31,7 @@ const loadToolSuite = async ({ pluginKey, tools, user, options = {} }) => {
          return value;
        }
      } catch (err) {
-        console.error(`Error fetching plugin auth value for ${field}: ${err.message}`);
+        logger.error(`Error fetching plugin auth value for ${field}: ${err.message}`);
      }
    }
    return null;
@@ -41,7 +42,7 @@ const loadToolSuite = async ({ pluginKey, tools, user, options = {} }) => {
    if (authValue !== null) {
      authValues[auth.authField] = authValue;
    } else {
-      console.warn(`No auth value found for ${auth.authField}`);
+      logger.warn(`[loadToolSuite] No auth value found for ${auth.authField}`);
    }
  }

--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@@ -1,5 +1,5 @@
 const Keyv = require('keyv');
-const { CacheKeys } = require('librechat-data-provider');
+const { CacheKeys, ViolationTypes } = require('librechat-data-provider');
 const { logFile, violationFile } = require('./keyvFiles');
 const { math, isEnabled } = require('~/server/utils');
 const keyvRedis = require('./keyvRedis');
@@ -37,7 +37,7 @@ const modelQueries = isEnabled(process.env.USE_REDIS)

 const abortKeys = isEnabled(USE_REDIS)
  ? new Keyv({ store: keyvRedis })
-  : new Keyv({ namespace: CacheKeys.ABORT_KEYS });
+  : new Keyv({ namespace: CacheKeys.ABORT_KEYS, ttl: 600000 });

 const namespaces = {
  [CacheKeys.CONFIG_STORE]: config,
@@ -47,9 +47,12 @@ const namespaces = {
  concurrent: createViolationInstance('concurrent'),
  non_browser: createViolationInstance('non_browser'),
  message_limit: createViolationInstance('message_limit'),
-  token_balance: createViolationInstance('token_balance'),
+  token_balance: createViolationInstance(ViolationTypes.TOKEN_BALANCE),
  registrations: createViolationInstance('registrations'),
-  [CacheKeys.FILE_UPLOAD_LIMIT]: createViolationInstance(CacheKeys.FILE_UPLOAD_LIMIT),
+  [ViolationTypes.FILE_UPLOAD_LIMIT]: createViolationInstance(ViolationTypes.FILE_UPLOAD_LIMIT),
+  [ViolationTypes.ILLEGAL_MODEL_REQUEST]: createViolationInstance(
+    ViolationTypes.ILLEGAL_MODEL_REQUEST,
+  ),
  logins: createViolationInstance('logins'),
  [CacheKeys.ABORT_KEYS]: abortKeys,
  [CacheKeys.TOKEN_CONFIG]: tokenConfig,
--- a/api/config/paths.js
+++ b/api/config/paths.js
@@ -1,7 +1,9 @@
 const path = require('path');

 module.exports = {
+  root: path.resolve(__dirname, '..', '..'),
  uploads: path.resolve(__dirname, '..', '..', 'uploads'),
+  clientPath: path.resolve(__dirname, '..', '..', 'client'),
  dist: path.resolve(__dirname, '..', '..', 'client', 'dist'),
  publicPath: path.resolve(__dirname, '..', '..', 'client', 'public'),
  imageOutput: path.resolve(__dirname, '..', '..', 'client', 'public', 'images'),
--- a/api/config/winston.js
+++ b/api/config/winston.js
@@ -5,7 +5,15 @@ const { redactFormat, redactMessage, debugTraverse } = require('./parsers');

 const logDir = path.join(__dirname, '..', 'logs');

-const { NODE_ENV, DEBUG_LOGGING = true, DEBUG_CONSOLE = false } = process.env;
+const { NODE_ENV, DEBUG_LOGGING = true, DEBUG_CONSOLE = false, CONSOLE_JSON = false } = process.env;
+
+const useConsoleJson =
+  (typeof CONSOLE_JSON === 'string' && CONSOLE_JSON?.toLowerCase() === 'true') ||
+  CONSOLE_JSON === true;
+
+const useDebugConsole =
+  (typeof DEBUG_CONSOLE === 'string' && DEBUG_CONSOLE?.toLowerCase() === 'true') ||
+  DEBUG_CONSOLE === true;

 const levels = {
  error: 0,
@@ -33,7 +41,7 @@ const level = () => {

 const fileFormat = winston.format.combine(
  redactFormat(),
-  winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
+  winston.format.timestamp({ format: () => new Date().toISOString() }),
  winston.format.errors({ stack: true }),
  winston.format.splat(),
  // redactErrors(),
@@ -99,14 +107,20 @@ const consoleFormat = winston.format.combine(
  }),
 );

-if (
-  (typeof DEBUG_CONSOLE === 'string' && DEBUG_CONSOLE?.toLowerCase() === 'true') ||
-  DEBUG_CONSOLE === true
-) {
+if (useDebugConsole) {
  transports.push(
    new winston.transports.Console({
      level: 'debug',
-      format: winston.format.combine(consoleFormat, debugTraverse),
+      format: useConsoleJson
+        ? winston.format.combine(fileFormat, debugTraverse, winston.format.json())
+        : winston.format.combine(fileFormat, debugTraverse),
+    }),
+  );
+} else if (useConsoleJson) {
+  transports.push(
+    new winston.transports.Console({
+      level: 'info',
+      format: winston.format.combine(fileFormat, winston.format.json()),
    }),
  );
 } else {
--- a/api/models/File.js
+++ b/api/models/File.js
@@ -69,7 +69,7 @@ const updateFileUsage = async (data) => {
  const { file_id, inc = 1 } = data;
  const updateOperation = {
    $inc: { usage: inc },
-    $unset: { expiresAt: '' },
+    $unset: { expiresAt: '', temp_file_id: '' },
  };
  return await File.findOneAndUpdate({ file_id }, updateOperation, { new: true }).lean();
 };
--- a/api/models/Transaction.js
+++ b/api/models/Transaction.js
@@ -2,6 +2,7 @@ const mongoose = require('mongoose');
 const { isEnabled } = require('../server/utils/handleText');
 const transactionSchema = require('./schema/transaction');
 const { getMultiplier } = require('./tx');
+const { logger } = require('~/config');
 const Balance = require('./Balance');
 const cancelRate = 1.15;

@@ -36,11 +37,37 @@ transactionSchema.statics.create = async function (transactionData) {
  }

  // Adjust the user's balance
-  return await Balance.findOneAndUpdate(
+  const updatedBalance = await Balance.findOneAndUpdate(
    { user: transaction.user },
    { $inc: { tokenCredits: transaction.tokenValue } },
    { upsert: true, new: true },
  ).lean();
+
+  return {
+    rate: transaction.rate,
+    user: transaction.user.toString(),
+    balance: updatedBalance.tokenCredits,
+    [transaction.tokenType]: transaction.tokenValue,
+  };
 };

-module.exports = mongoose.model('Transaction', transactionSchema);
+const Transaction = mongoose.model('Transaction', transactionSchema);
+
+/**
+ * Queries and retrieves transactions based on a given filter.
+ * @async
+ * @function getTransactions
+ * @param {Object} filter - MongoDB filter object to apply when querying transactions.
+ * @returns {Promise<Array>} A promise that resolves to an array of matched transactions.
+ * @throws {Error} Throws an error if querying the database fails.
+ */
+async function getTransactions(filter) {
+  try {
+    return await Transaction.find(filter).lean();
+  } catch (error) {
+    logger.error('Error querying transactions:', error);
+    throw error;
+  }
+}
+
+module.exports = { Transaction, getTransactions };
--- a/api/models/checkBalance.js
+++ b/api/models/checkBalance.js
@@ -1,5 +1,6 @@
+const { ViolationTypes } = require('librechat-data-provider');
+const { logViolation } = require('~/cache');
 const Balance = require('./Balance');
-const { logViolation } = require('../cache');
 /**
 * Checks the balance for a user and determines if they can spend a certain amount.
 * If the user cannot spend the amount, it logs a violation and denies the request.
@@ -25,7 +26,7 @@ const checkBalance = async ({ req, res, txData }) => {
    return true;
  }

-  const type = 'token_balance';
+  const type = ViolationTypes.TOKEN_BALANCE;
  const errorMessage = {
    type,
    balance,
--- a/api/models/index.js
+++ b/api/models/index.js
@@ -22,14 +22,12 @@ const Key = require('./Key');
 const User = require('./User');
 const Session = require('./Session');
 const Balance = require('./Balance');
-const Transaction = require('./Transaction');

 module.exports = {
  User,
  Key,
  Session,
  Balance,
-  Transaction,

  hashPassword,
  updateUser,
--- a/api/models/schema/action.js
+++ b/api/models/schema/action.js
@@ -45,7 +45,6 @@ const actionSchema = new Schema({
    auth: AuthSchema,
    domain: {
      type: String,
-      unique: true,
      required: true,
    },
    // json_schema: Schema.Types.Mixed,
--- a/api/models/schema/assistant.js
+++ b/api/models/schema/assistant.js
@@ -9,7 +9,6 @@ const assistantSchema = mongoose.Schema(
    },
    assistant_id: {
      type: String,
-      unique: true,
      index: true,
      required: true,
    },
--- a/api/models/schema/defaults.js
+++ b/api/models/schema/defaults.js
@@ -70,10 +70,14 @@ const conversationPreset = {
    type: String,
  },
  file_ids: { type: [{ type: String }], default: undefined },
-  // vision
+  // deprecated
  resendImages: {
    type: Boolean,
  },
+  // files
+  resendFiles: {
+    type: Boolean,
+  },
  imageDetail: {
    type: String,
  },
--- a/api/models/schema/fileSchema.js
+++ b/api/models/schema/fileSchema.js
@@ -15,6 +15,9 @@ const mongoose = require('mongoose');
 * @property {'file'} object - Type of object, always 'file'
 * @property {string} type - Type of file
 * @property {number} usage - Number of uses of the file
+ * @property {string} [context] - Context of the file origin
+ * @property {boolean} [embedded] - Whether or not the file is embedded in vector db
+ * @property {string} [model] - The model to identify the group region of the file (for Azure OpenAI hosting)
 * @property {string} [source] - The source of the file
 * @property {number} [width] - Optional width of the file
 * @property {number} [height] - Optional height of the file
@@ -61,6 +64,9 @@ const fileSchema = mongoose.Schema(
      required: true,
      default: 'file',
    },
+    embedded: {
+      type: Boolean,
+    },
    type: {
      type: String,
      required: true,
@@ -78,6 +84,9 @@ const fileSchema = mongoose.Schema(
      type: String,
      default: FileSources.local,
    },
+    model: {
+      type: String,
+    },
    width: Number,
    height: Number,
    expiresAt: {
--- a/api/models/spendTokens.js
+++ b/api/models/spendTokens.js
@@ -1,4 +1,4 @@
-const Transaction = require('./Transaction');
+const { Transaction } = require('./Transaction');
 const { logger } = require('~/config');

 /**
@@ -21,10 +21,15 @@ const { logger } = require('~/config');
 */
 const spendTokens = async (txData, tokenUsage) => {
  const { promptTokens, completionTokens } = tokenUsage;
-  logger.debug(`[spendTokens] conversationId: ${txData.conversationId} | Token usage: `, {
-    promptTokens,
-    completionTokens,
-  });
+  logger.debug(
+    `[spendTokens] conversationId: ${txData.conversationId}${
+      txData?.context ? ` | Context: ${txData?.context}` : ''
+    } | Token usage: `,
+    {
+      promptTokens,
+      completionTokens,
+    },
+  );
  let prompt, completion;
  try {
    if (promptTokens >= 0) {
@@ -49,8 +54,12 @@ const spendTokens = async (txData, tokenUsage) => {
    prompt &&
      completion &&
      logger.debug('[spendTokens] Transaction data record against balance:', {
-        prompt,
-        completion,
+        user: prompt.user,
+        prompt: prompt.prompt,
+        promptRate: prompt.rate,
+        completion: completion.completion,
+        completionRate: completion.rate,
+        balance: completion.balance,
      });
  } catch (err) {
    logger.error('[spendTokens]', err);
--- a/api/models/tx.js
+++ b/api/models/tx.js
@@ -13,6 +13,12 @@ const tokenValues = {
  'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 },
  'gpt-4-1106': { prompt: 10, completion: 30 },
  'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 },
+  'claude-3-opus': { prompt: 15, completion: 75 },
+  'claude-3-sonnet': { prompt: 3, completion: 15 },
+  'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
+  'claude-2.1': { prompt: 8, completion: 24 },
+  'claude-2': { prompt: 8, completion: 24 },
+  'claude-': { prompt: 0.8, completion: 2.4 },
 };

 /**
@@ -46,6 +52,8 @@ const getValueKey = (model, endpoint) => {
    return '32k';
  } else if (modelName.includes('gpt-4')) {
    return '8k';
+  } else if (tokenValues[modelName]) {
+    return modelName;
  }

  return undefined;
--- a/api/package.json
+++ b/api/package.json
@@ -1,13 +1,19 @@
 {
  "name": "@librechat/backend",
-  "version": "0.6.10",
+  "version": "0.7.0",
  "description": "",
  "scripts": {
    "start": "echo 'please run this from the root directory'",
    "server-dev": "echo 'please run this from the root directory'",
    "test": "cross-env NODE_ENV=test jest",
    "b:test": "NODE_ENV=test bun jest",
-    "test:ci": "jest --ci"
+    "test:ci": "jest --ci",
+    "add-balance": "node ./add-balance.js",
+    "list-balances": "node ./list-balances.js",
+    "user-stats": "node ./user-stats.js",
+    "create-user": "node ./create-user.js",
+    "ban-user": "node ./ban-user.js",
+    "delete-user": "node ./delete-user.js"
  },
  "repository": {
    "type": "git",
@@ -25,9 +31,9 @@
  "bugs": {
    "url": "https://github.com/danny-avila/LibreChat/issues"
  },
-  "homepage": "https://github.com/danny-avila/LibreChat#readme",
+  "homepage": "https://librechat.ai",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.5.4",
+    "@anthropic-ai/sdk": "^0.16.1",
    "@azure/search-documents": "^12.0.0",
    "@keyv/mongo": "^2.1.8",
    "@keyv/redis": "^2.8.1",
@@ -46,7 +52,7 @@
    "express-rate-limit": "^6.9.0",
    "express-session": "^1.17.3",
    "file-type": "^18.7.0",
-    "firebase": "^10.6.0",
+    "firebase": "^10.8.0",
    "googleapis": "^126.0.1",
    "handlebars": "^4.7.7",
    "html": "^1.0.0",
@@ -59,14 +65,14 @@
    "langchain": "^0.0.214",
    "librechat-data-provider": "*",
    "lodash": "^4.17.21",
-    "meilisearch": "^0.33.0",
+    "meilisearch": "^0.38.0",
    "mime": "^3.0.0",
    "module-alias": "^2.2.3",
    "mongoose": "^7.1.1",
    "multer": "^1.4.5-lts.1",
    "nodejs-gpt": "^1.37.4",
    "nodemailer": "^6.9.4",
-    "openai": "^4.20.1",
+    "openai": "^4.29.0",
    "openai-chat-tokens": "^0.2.8",
    "openid-client": "^5.4.2",
    "passport": "^0.6.0",
--- a/api/server/controllers/AskController.js
+++ b/api/server/controllers/AskController.js
@@ -1,7 +1,8 @@
+const throttle = require('lodash/throttle');
 const { getResponseSender, Constants } = require('librechat-data-provider');
-const { sendMessage, createOnProgress } = require('~/server/utils');
-const { saveMessage, getConvoTitle, getConvo } = require('~/models');
 const { createAbortController, handleAbortError } = require('~/server/middleware');
+const { sendMessage, createOnProgress } = require('~/server/utils');
+const { saveMessage, getConvo } = require('~/models');
 const { logger } = require('~/config');

 const AskController = async (req, res, next, initializeClient, addTitle) => {
@@ -16,13 +17,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {

  logger.debug('[AskController]', { text, conversationId, ...endpointOption });

-  let metadata;
  let userMessage;
  let promptTokens;
  let userMessageId;
  let responseMessageId;
-  let lastSavedTimestamp = 0;
-  let saveDelay = 100;
  const sender = getResponseSender({
    ...endpointOption,
    model: endpointOption.modelOptions.model,
@@ -31,8 +29,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
  const newConvo = !conversationId;
  const user = req.user.id;

-  const addMetadata = (data) => (metadata = data);
-
  const getReqData = (data = {}) => {
    for (let key in data) {
      if (key === 'userMessage') {
@@ -54,11 +50,8 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
    const { client } = await initializeClient({ req, res, endpointOption });

    const { onProgress: progressCallback, getPartialText } = createOnProgress({
-      onProgress: ({ text: partialText }) => {
-        const currentTimestamp = Date.now();
-
-        if (currentTimestamp - lastSavedTimestamp > saveDelay) {
-          lastSavedTimestamp = currentTimestamp;
+      onProgress: throttle(
+        ({ text: partialText }) => {
          saveMessage({
            messageId: responseMessageId,
            sender,
@@ -70,12 +63,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
            error: false,
            user,
          });
-        }
-
-        if (saveDelay < 500) {
-          saveDelay = 500;
-        }
-      },
+        },
+        3000,
+        { trailing: false },
+      ),
    });

    getText = getPartialText;
@@ -92,6 +83,20 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {

    const { abortController, onStart } = createAbortController(req, res, getAbortData);

+    res.on('close', () => {
+      logger.debug('[AskController] Request closed');
+      if (!abortController) {
+        return;
+      } else if (abortController.signal.aborted) {
+        return;
+      } else if (abortController.requestCompleted) {
+        return;
+      }
+
+      abortController.abort();
+      logger.debug('[AskController] Request aborted on close');
+    });
+
    const messageOptions = {
      user,
      parentMessageId,
@@ -99,7 +104,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
      overrideParentMessageId,
      getReqData,
      onStart,
-      addMetadata,
      abortController,
      onProgress: progressCallback.call(null, {
        res,
@@ -114,22 +118,23 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
      response.parentMessageId = overrideParentMessageId;
    }

-    if (metadata) {
-      response = { ...response, ...metadata };
-    }
-
    response.endpoint = endpointOption.endpoint;

+    const conversation = await getConvo(user, conversationId);
+    conversation.title =
+      conversation && !conversation.title ? null : conversation?.title || 'New Chat';
+
    if (client.options.attachments) {
      userMessage.files = client.options.attachments;
+      conversation.model = endpointOption.modelOptions.model;
      delete userMessage.image_urls;
    }

    if (!abortController.signal.aborted) {
      sendMessage(res, {
-        title: await getConvoTitle(user, conversationId),
        final: true,
-        conversation: await getConvo(user, conversationId),
+        conversation,
+        title: conversation.title,
        requestMessage: userMessage,
        responseMessage: response,
      });
--- a/api/server/controllers/EditController.js
+++ b/api/server/controllers/EditController.js
@@ -1,7 +1,8 @@
+const throttle = require('lodash/throttle');
 const { getResponseSender } = require('librechat-data-provider');
-const { sendMessage, createOnProgress } = require('~/server/utils');
-const { saveMessage, getConvoTitle, getConvo } = require('~/models');
 const { createAbortController, handleAbortError } = require('~/server/middleware');
+const { sendMessage, createOnProgress } = require('~/server/utils');
+const { saveMessage, getConvo } = require('~/models');
 const { logger } = require('~/config');

 const EditController = async (req, res, next, initializeClient) => {
@@ -25,11 +26,8 @@ const EditController = async (req, res, next, initializeClient) => {
    ...endpointOption,
  });

-  let metadata;
  let userMessage;
  let promptTokens;
-  let lastSavedTimestamp = 0;
-  let saveDelay = 100;
  const sender = getResponseSender({
    ...endpointOption,
    model: endpointOption.modelOptions.model,
@@ -38,7 +36,6 @@ const EditController = async (req, res, next, initializeClient) => {
  const userMessageId = parentMessageId;
  const user = req.user.id;

-  const addMetadata = (data) => (metadata = data);
  const getReqData = (data = {}) => {
    for (let key in data) {
      if (key === 'userMessage') {
@@ -53,11 +50,8 @@ const EditController = async (req, res, next, initializeClient) => {

  const { onProgress: progressCallback, getPartialText } = createOnProgress({
    generation,
-    onProgress: ({ text: partialText }) => {
-      const currentTimestamp = Date.now();
-
-      if (currentTimestamp - lastSavedTimestamp > saveDelay) {
-        lastSavedTimestamp = currentTimestamp;
+    onProgress: throttle(
+      ({ text: partialText }) => {
        saveMessage({
          messageId: responseMessageId,
          sender,
@@ -70,12 +64,10 @@ const EditController = async (req, res, next, initializeClient) => {
          error: false,
          user,
        });
-      }
-
-      if (saveDelay < 500) {
-        saveDelay = 500;
-      }
-    },
+      },
+      3000,
+      { trailing: false },
+    ),
  });

  const getAbortData = () => ({
@@ -90,6 +82,20 @@ const EditController = async (req, res, next, initializeClient) => {

  const { abortController, onStart } = createAbortController(req, res, getAbortData);

+  res.on('close', () => {
+    logger.debug('[EditController] Request closed');
+    if (!abortController) {
+      return;
+    } else if (abortController.signal.aborted) {
+      return;
+    } else if (abortController.requestCompleted) {
+      return;
+    }
+
+    abortController.abort();
+    logger.debug('[EditController] Request aborted on close');
+  });
+
  try {
    const { client } = await initializeClient({ req, res, endpointOption });

@@ -104,7 +110,6 @@ const EditController = async (req, res, next, initializeClient) => {
      overrideParentMessageId,
      getReqData,
      onStart,
-      addMetadata,
      abortController,
      onProgress: progressCallback.call(null, {
        res,
@@ -113,15 +118,19 @@ const EditController = async (req, res, next, initializeClient) => {
      }),
    });

-    if (metadata) {
-      response = { ...response, ...metadata };
+    const conversation = await getConvo(user, conversationId);
+    conversation.title =
+      conversation && !conversation.title ? null : conversation?.title || 'New Chat';
+
+    if (client.options.attachments) {
+      conversation.model = endpointOption.modelOptions.model;
    }

    if (!abortController.signal.aborted) {
      sendMessage(res, {
-        title: await getConvoTitle(user, conversationId),
        final: true,
-        conversation: await getConvo(user, conversationId),
+        conversation,
+        title: conversation.title,
        requestMessage: userMessage,
        responseMessage: response,
      });
--- a/api/server/controllers/EndpointController.js
+++ b/api/server/controllers/EndpointController.js
@@ -1,4 +1,4 @@
-const { CacheKeys, EModelEndpoint } = require('librechat-data-provider');
+const { CacheKeys, EModelEndpoint, orderEndpointsConfig } = require('librechat-data-provider');
 const { loadDefaultEndpointsConfig, loadConfigEndpoints } = require('~/server/services/Config');
 const { getLogStores } = require('~/cache');

@@ -10,15 +10,24 @@ async function endpointController(req, res) {
    return;
  }

-  const defaultEndpointsConfig = await loadDefaultEndpointsConfig();
-  const customConfigEndpoints = await loadConfigEndpoints();
+  const defaultEndpointsConfig = await loadDefaultEndpointsConfig(req);
+  const customConfigEndpoints = await loadConfigEndpoints(req);

-  const endpointsConfig = { ...defaultEndpointsConfig, ...customConfigEndpoints };
-  if (endpointsConfig[EModelEndpoint.assistants] && req.app.locals?.[EModelEndpoint.assistants]) {
-    endpointsConfig[EModelEndpoint.assistants].disableBuilder =
-      req.app.locals[EModelEndpoint.assistants].disableBuilder;
+  /** @type {TEndpointsConfig} */
+  const mergedConfig = { ...defaultEndpointsConfig, ...customConfigEndpoints };
+  if (mergedConfig[EModelEndpoint.assistants] && req.app.locals?.[EModelEndpoint.assistants]) {
+    const { disableBuilder, retrievalModels, capabilities, ..._rest } =
+      req.app.locals[EModelEndpoint.assistants];
+    mergedConfig[EModelEndpoint.assistants] = {
+      ...mergedConfig[EModelEndpoint.assistants],
+      retrievalModels,
+      disableBuilder,
+      capabilities,
+    };
  }

+  const endpointsConfig = orderEndpointsConfig(mergedConfig);
+
  await cache.set(CacheKeys.ENDPOINT_CONFIG, endpointsConfig);
  res.send(JSON.stringify(endpointsConfig));
 }
--- a/api/server/controllers/ModelController.js
+++ b/api/server/controllers/ModelController.js
@@ -2,12 +2,26 @@ const { CacheKeys } = require('librechat-data-provider');
 const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
 const { getLogStores } = require('~/cache');

-async function modelController(req, res) {
+const getModelsConfig = async (req) => {
+  const cache = getLogStores(CacheKeys.CONFIG_STORE);
+  let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
+  if (!modelsConfig) {
+    modelsConfig = await loadModels(req);
+  }
+
+  return modelsConfig;
+};
+
+/**
+ * Loads the models from the config.
+ * @param {Express.Request} req - The Express request object.
+ * @returns {Promise<TModelsConfig>} The models config.
+ */
+async function loadModels(req) {
  const cache = getLogStores(CacheKeys.CONFIG_STORE);
  const cachedModelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
  if (cachedModelsConfig) {
-    res.send(cachedModelsConfig);
-    return;
+    return cachedModelsConfig;
  }
  const defaultModelsConfig = await loadDefaultModels(req);
  const customModelsConfig = await loadConfigModels(req);
@@ -15,7 +29,12 @@ async function modelController(req, res) {
  const modelConfig = { ...defaultModelsConfig, ...customModelsConfig };

  await cache.set(CacheKeys.MODELS_CONFIG, modelConfig);
+  return modelConfig;
+}
+
+async function modelController(req, res) {
+  const modelConfig = await loadModels(req);
  res.send(modelConfig);
 }

-module.exports = modelController;
+module.exports = { modelController, loadModels, getModelsConfig };
--- a/api/server/index.js
+++ b/api/server/index.js
@@ -2,6 +2,7 @@ require('dotenv').config();
 const path = require('path');
 require('module-alias')({ base: path.resolve(__dirname, '..') });
 const cors = require('cors');
+const axios = require('axios');
 const express = require('express');
 const passport = require('passport');
 const mongoSanitize = require('express-mongo-sanitize');
@@ -22,6 +23,9 @@ const port = Number(PORT) || 3080;
 const host = HOST || 'localhost';

 const startServer = async () => {
+  if (typeof Bun !== 'undefined') {
+    axios.defaults.headers.common['Accept-Encoding'] = 'gzip';
+  }
  await connectDb();
  logger.info('Connected to MongoDB');
  await indexSync();
--- a/api/server/middleware/abortMiddleware.js
+++ b/api/server/middleware/abortMiddleware.js
@@ -110,7 +110,7 @@ const handleAbortError = async (res, req, error, data) => {
  }

  const respondWithError = async (partialText) => {
-    const options = {
+    let options = {
      sender,
      messageId,
      conversationId,
@@ -121,7 +121,8 @@ const handleAbortError = async (res, req, error, data) => {
    };

    if (partialText) {
-      options.overrideProps = {
+      options = {
+        ...options,
        error: false,
        unfinished: true,
        text: partialText,
--- a/api/server/middleware/abortRun.js
+++ b/api/server/middleware/abortRun.js
@@ -1,16 +1,22 @@
 const { CacheKeys, RunStatus, isUUID } = require('librechat-data-provider');
-const { initializeClient } = require('~/server/services/Endpoints/assistant');
+const { initializeClient } = require('~/server/services/Endpoints/assistants');
 const { checkMessageGaps, recordUsage } = require('~/server/services/Threads');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
 const { sendMessage } = require('~/server/utils');
-// const spendTokens = require('~/models/spendTokens');
 const { logger } = require('~/config');

+const three_minutes = 1000 * 60 * 3;
+
 async function abortRun(req, res) {
  res.setHeader('Content-Type', 'application/json');
  const { abortKey } = req.body;
  const [conversationId, latestMessageId] = abortKey.split(':');
+  const conversation = await getConvo(req.user.id, conversationId);
+
+  if (conversation?.model) {
+    req.body.model = conversation.model;
+  }

  if (!isUUID.safeParse(conversationId).success) {
    logger.error('[abortRun] Invalid conversationId', { conversationId });
@@ -35,9 +41,9 @@ async function abortRun(req, res) {
  const { openai } = await initializeClient({ req, res });

  try {
-    await cache.set(cacheKey, 'cancelled');
+    await cache.set(cacheKey, 'cancelled', three_minutes);
    const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
-    logger.debug('Cancelled run:', cancelledRun);
+    logger.debug('[abortRun] Cancelled run:', cancelledRun);
  } catch (error) {
    logger.error('[abortRun] Error cancelling run', error);
    if (
@@ -71,7 +77,7 @@ async function abortRun(req, res) {
  const finalEvent = {
    title: 'New Chat',
    final: true,
-    conversation: await getConvo(req.user.id, conversationId),
+    conversation,
    runMessages,
  };

--- a/api/server/middleware/buildEndpointOption.js
+++ b/api/server/middleware/buildEndpointOption.js
@@ -1,11 +1,12 @@
 const { parseConvo, EModelEndpoint } = require('librechat-data-provider');
-const { processFiles } = require('~/server/services/Files/process');
+const { getModelsConfig } = require('~/server/controllers/ModelController');
+const assistants = require('~/server/services/Endpoints/assistants');
 const gptPlugins = require('~/server/services/Endpoints/gptPlugins');
+const { processFiles } = require('~/server/services/Files/process');
 const anthropic = require('~/server/services/Endpoints/anthropic');
 const openAI = require('~/server/services/Endpoints/openAI');
 const custom = require('~/server/services/Endpoints/custom');
 const google = require('~/server/services/Endpoints/google');
-const assistant = require('~/server/services/Endpoints/assistant');

 const buildFunction = {
  [EModelEndpoint.openAI]: openAI.buildOptions,
@@ -14,10 +15,10 @@ const buildFunction = {
  [EModelEndpoint.azureOpenAI]: openAI.buildOptions,
  [EModelEndpoint.anthropic]: anthropic.buildOptions,
  [EModelEndpoint.gptPlugins]: gptPlugins.buildOptions,
-  [EModelEndpoint.assistants]: assistant.buildOptions,
+  [EModelEndpoint.assistants]: assistants.buildOptions,
 };

-function buildEndpointOption(req, res, next) {
+async function buildEndpointOption(req, res, next) {
  const { endpoint, endpointType } = req.body;
  const parsedBody = parseConvo({ endpoint, endpointType, conversation: req.body });
  req.body.endpointOption = buildFunction[endpointType ?? endpoint](
@@ -25,6 +26,10 @@ function buildEndpointOption(req, res, next) {
    parsedBody,
    endpointType,
  );
+
+  const modelsConfig = await getModelsConfig(req);
+  req.body.endpointOption.modelsConfig = modelsConfig;
+
  if (req.body.files) {
    // hold the promise
    req.body.endpointOption.attachments = processFiles(req.body.files);
--- a/api/server/middleware/index.js
+++ b/api/server/middleware/index.js
@@ -3,6 +3,7 @@ const checkBan = require('./checkBan');
 const uaParser = require('./uaParser');
 const setHeaders = require('./setHeaders');
 const loginLimiter = require('./loginLimiter');
+const validateModel = require('./validateModel');
 const requireJwtAuth = require('./requireJwtAuth');
 const uploadLimiters = require('./uploadLimiters');
 const registerLimiter = require('./registerLimiter');
@@ -32,6 +33,7 @@ module.exports = {
  validateMessageReq,
  buildEndpointOption,
  validateRegistration,
+  validateModel,
  moderateText,
  noIndex,
 };
--- a/api/server/middleware/moderateText.js
+++ b/api/server/middleware/moderateText.js
@@ -1,5 +1,6 @@
 const axios = require('axios');
 const denyRequest = require('./denyRequest');
+const { logger } = require('~/config');

 async function moderateText(req, res, next) {
  if (process.env.OPENAI_MODERATION === 'true') {
@@ -28,7 +29,7 @@ async function moderateText(req, res, next) {
        return await denyRequest(req, res, errorMessage);
      }
    } catch (error) {
-      console.error('Error in moderateText:', error);
+      logger.error('Error in moderateText:', error);
      const errorMessage = 'error in moderation check';
      return await denyRequest(req, res, errorMessage);
    }
--- a/api/server/middleware/uploadLimiters.js
+++ b/api/server/middleware/uploadLimiters.js
@@ -1,5 +1,5 @@
 const rateLimit = require('express-rate-limit');
-const { CacheKeys } = require('librechat-data-provider');
+const { ViolationTypes } = require('librechat-data-provider');
 const logViolation = require('~/cache/logViolation');

 const getEnvironmentVariables = () => {
@@ -35,7 +35,7 @@ const createFileUploadHandler = (ip = true) => {
  } = getEnvironmentVariables();

  return async (req, res) => {
-    const type = CacheKeys.FILE_UPLOAD_LIMIT;
+    const type = ViolationTypes.FILE_UPLOAD_LIMIT;
    const errorMessage = {
      type,
      max: ip ? fileUploadIpMax : fileUploadUserMax,
--- a/api/server/middleware/validateModel.js
+++ b/api/server/middleware/validateModel.js
@@ -0,0 +1,47 @@
+const { ViolationTypes } = require('librechat-data-provider');
+const { getModelsConfig } = require('~/server/controllers/ModelController');
+const { handleError } = require('~/server/utils');
+const { logViolation } = require('~/cache');
+/**
+ * Validates the model of the request.
+ *
+ * @async
+ * @param {Express.Request} req - The Express request object.
+ * @param {Express.Response} res - The Express response object.
+ * @param {Function} next - The Express next function.
+ */
+const validateModel = async (req, res, next) => {
+  const { model, endpoint } = req.body;
+  if (!model) {
+    return handleError(res, { text: 'Model not provided' });
+  }
+
+  const modelsConfig = await getModelsConfig(req);
+
+  if (!modelsConfig) {
+    return handleError(res, { text: 'Models not loaded' });
+  }
+
+  const availableModels = modelsConfig[endpoint];
+  if (!availableModels) {
+    return handleError(res, { text: 'Endpoint models not loaded' });
+  }
+
+  let validModel = !!availableModels.find((availableModel) => availableModel === model);
+
+  if (validModel) {
+    return next();
+  }
+
+  const { ILLEGAL_MODEL_REQ_SCORE: score = 5 } = process.env ?? {};
+
+  const type = ViolationTypes.ILLEGAL_MODEL_REQUEST;
+  const errorMessage = {
+    type,
+  };
+
+  await logViolation(req, res, type, errorMessage, score);
+  return handleError(res, { text: 'Illegal model request' });
+};
+
+module.exports = validateModel;
--- a/api/server/routes/ask/anthropic.js
+++ b/api/server/routes/ask/anthropic.js
@@ -1,9 +1,10 @@
 const express = require('express');
 const AskController = require('~/server/controllers/AskController');
-const { initializeClient } = require('~/server/services/Endpoints/anthropic');
+const { addTitle, initializeClient } = require('~/server/services/Endpoints/anthropic');
 const {
  setHeaders,
  handleAbort,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -12,8 +13,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await AskController(req, res, next, initializeClient);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await AskController(req, res, next, initializeClient, addTitle);
+  },
+);

 module.exports = router;
--- a/api/server/routes/ask/custom.js
+++ b/api/server/routes/ask/custom.js
@@ -5,6 +5,7 @@ const { addTitle } = require('~/server/services/Endpoints/openAI');
 const {
  handleAbort,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -13,8 +14,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await AskController(req, res, next, initializeClient, addTitle);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await AskController(req, res, next, initializeClient, addTitle);
+  },
+);

 module.exports = router;
--- a/api/server/routes/ask/google.js
+++ b/api/server/routes/ask/google.js
@@ -4,6 +4,7 @@ const { initializeClient } = require('~/server/services/Endpoints/google');
 const {
  setHeaders,
  handleAbort,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -12,8 +13,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await AskController(req, res, next, initializeClient);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await AskController(req, res, next, initializeClient);
+  },
+);

 module.exports = router;
--- a/api/server/routes/ask/gptPlugins.js
+++ b/api/server/routes/ask/gptPlugins.js
@@ -1,81 +1,88 @@
 const express = require('express');
-const router = express.Router();
+const throttle = require('lodash/throttle');
 const { getResponseSender, Constants } = require('librechat-data-provider');
-const { validateTools } = require('~/app');
-const { addTitle } = require('~/server/services/Endpoints/openAI');
 const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
 const { saveMessage, getConvoTitle, getConvo } = require('~/models');
 const { sendMessage, createOnProgress } = require('~/server/utils');
+const { addTitle } = require('~/server/services/Endpoints/openAI');
 const {
  handleAbort,
  createAbortController,
  handleAbortError,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
  moderateText,
 } = require('~/server/middleware');
+const { validateTools } = require('~/app');
 const { logger } = require('~/config');

+const router = express.Router();
+
 router.use(moderateText);
 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res) => {
-  let {
-    text,
-    endpointOption,
-    conversationId,
-    parentMessageId = null,
-    overrideParentMessageId = null,
-  } = req.body;
-  logger.debug('[/ask/gptPlugins]', { text, conversationId, ...endpointOption });
-  let metadata;
-  let userMessage;
-  let promptTokens;
-  let userMessageId;
-  let responseMessageId;
-  let lastSavedTimestamp = 0;
-  let saveDelay = 100;
-  const sender = getResponseSender({ ...endpointOption, model: endpointOption.modelOptions.model });
-  const newConvo = !conversationId;
-  const user = req.user.id;
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res) => {
+    let {
+      text,
+      endpointOption,
+      conversationId,
+      parentMessageId = null,
+      overrideParentMessageId = null,
+    } = req.body;

-  const plugins = [];
+    logger.debug('[/ask/gptPlugins]', { text, conversationId, ...endpointOption });

-  const addMetadata = (data) => (metadata = data);
-  const getReqData = (data = {}) => {
-    for (let key in data) {
-      if (key === 'userMessage') {
-        userMessage = data[key];
-        userMessageId = data[key].messageId;
-      } else if (key === 'responseMessageId') {
-        responseMessageId = data[key];
-      } else if (key === 'promptTokens') {
-        promptTokens = data[key];
-      } else if (!conversationId && key === 'conversationId') {
-        conversationId = data[key];
+    let userMessage;
+    let promptTokens;
+    let userMessageId;
+    let responseMessageId;
+    const sender = getResponseSender({
+      ...endpointOption,
+      model: endpointOption.modelOptions.model,
+    });
+    const newConvo = !conversationId;
+    const user = req.user.id;
+
+    const plugins = [];
+
+    const getReqData = (data = {}) => {
+      for (let key in data) {
+        if (key === 'userMessage') {
+          userMessage = data[key];
+          userMessageId = data[key].messageId;
+        } else if (key === 'responseMessageId') {
+          responseMessageId = data[key];
+        } else if (key === 'promptTokens') {
+          promptTokens = data[key];
+        } else if (!conversationId && key === 'conversationId') {
+          conversationId = data[key];
+        }
      }
-    }
-  };
+    };

-  let streaming = null;
-  let timer = null;
+    const throttledSaveMessage = throttle(saveMessage, 3000, { trailing: false });
+    let streaming = null;
+    let timer = null;

-  const {
-    onProgress: progressCallback,
-    sendIntermediateMessage,
-    getPartialText,
-  } = createOnProgress({
-    onProgress: ({ text: partialText }) => {
-      const currentTimestamp = Date.now();
+    const {
+      onProgress: progressCallback,
+      sendIntermediateMessage,
+      getPartialText,
+    } = createOnProgress({
+      onProgress: ({ text: partialText }) => {
+        if (timer) {
+          clearTimeout(timer);
+        }

-      if (timer) {
-        clearTimeout(timer);
-      }
-
-      if (currentTimestamp - lastSavedTimestamp > saveDelay) {
-        lastSavedTimestamp = currentTimestamp;
-        saveMessage({
+        throttledSaveMessage({
          messageId: responseMessageId,
          sender,
          conversationId,
@@ -87,140 +94,131 @@ router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req,
          plugins,
          user,
        });
-      }

-      if (saveDelay < 500) {
-        saveDelay = 500;
-      }
+        streaming = new Promise((resolve) => {
+          timer = setTimeout(() => {
+            resolve();
+          }, 250);
+        });
+      },
+    });

-      streaming = new Promise((resolve) => {
-        timer = setTimeout(() => {
-          resolve();
-        }, 250);
-      });
-    },
-  });
-
-  const pluginMap = new Map();
-  const onAgentAction = async (action, runId) => {
-    pluginMap.set(runId, action.tool);
-    sendIntermediateMessage(res, { plugins });
-  };
-
-  const onToolStart = async (tool, input, runId, parentRunId) => {
-    const pluginName = pluginMap.get(parentRunId);
-    const latestPlugin = {
-      runId,
-      loading: true,
-      inputs: [input],
-      latest: pluginName,
-      outputs: null,
+    const pluginMap = new Map();
+    const onAgentAction = async (action, runId) => {
+      pluginMap.set(runId, action.tool);
+      sendIntermediateMessage(res, { plugins });
    };

-    if (streaming) {
-      await streaming;
-    }
-    const extraTokens = ':::plugin:::\n';
-    plugins.push(latestPlugin);
-    sendIntermediateMessage(res, { plugins }, extraTokens);
-  };
+    const onToolStart = async (tool, input, runId, parentRunId) => {
+      const pluginName = pluginMap.get(parentRunId);
+      const latestPlugin = {
+        runId,
+        loading: true,
+        inputs: [input],
+        latest: pluginName,
+        outputs: null,
+      };

-  const onToolEnd = async (output, runId) => {
-    if (streaming) {
-      await streaming;
-    }
+      if (streaming) {
+        await streaming;
+      }
+      const extraTokens = ':::plugin:::\n';
+      plugins.push(latestPlugin);
+      sendIntermediateMessage(res, { plugins }, extraTokens);
+    };

-    const pluginIndex = plugins.findIndex((plugin) => plugin.runId === runId);
+    const onToolEnd = async (output, runId) => {
+      if (streaming) {
+        await streaming;
+      }

-    if (pluginIndex !== -1) {
-      plugins[pluginIndex].loading = false;
-      plugins[pluginIndex].outputs = output;
-    }
-  };
+      const pluginIndex = plugins.findIndex((plugin) => plugin.runId === runId);

-  const onChainEnd = () => {
-    saveMessage({ ...userMessage, user });
-    sendIntermediateMessage(res, { plugins });
-  };
+      if (pluginIndex !== -1) {
+        plugins[pluginIndex].loading = false;
+        plugins[pluginIndex].outputs = output;
+      }
+    };

-  const getAbortData = () => ({
-    sender,
-    conversationId,
-    messageId: responseMessageId,
-    parentMessageId: overrideParentMessageId ?? userMessageId,
-    text: getPartialText(),
-    plugins: plugins.map((p) => ({ ...p, loading: false })),
-    userMessage,
-    promptTokens,
-  });
-  const { abortController, onStart } = createAbortController(req, res, getAbortData);
+    const onChainEnd = () => {
+      saveMessage({ ...userMessage, user });
+      sendIntermediateMessage(res, { plugins });
+    };

-  try {
-    endpointOption.tools = await validateTools(user, endpointOption.tools);
-    const { client } = await initializeClient({ req, res, endpointOption });
-
-    let response = await client.sendMessage(text, {
-      user,
+    const getAbortData = () => ({
+      sender,
      conversationId,
-      parentMessageId,
-      overrideParentMessageId,
-      getReqData,
-      onAgentAction,
-      onChainEnd,
-      onToolStart,
-      onToolEnd,
-      onStart,
-      addMetadata,
-      getPartialText,
-      ...endpointOption,
-      onProgress: progressCallback.call(null, {
-        res,
-        text,
-        parentMessageId: overrideParentMessageId || userMessageId,
-        plugins,
-      }),
-      abortController,
+      messageId: responseMessageId,
+      parentMessageId: overrideParentMessageId ?? userMessageId,
+      text: getPartialText(),
+      plugins: plugins.map((p) => ({ ...p, loading: false })),
+      userMessage,
+      promptTokens,
    });
+    const { abortController, onStart } = createAbortController(req, res, getAbortData);

-    if (overrideParentMessageId) {
-      response.parentMessageId = overrideParentMessageId;
-    }
+    try {
+      endpointOption.tools = await validateTools(user, endpointOption.tools);
+      const { client } = await initializeClient({ req, res, endpointOption });

-    if (metadata) {
-      response = { ...response, ...metadata };
-    }
+      let response = await client.sendMessage(text, {
+        user,
+        conversationId,
+        parentMessageId,
+        overrideParentMessageId,
+        getReqData,
+        onAgentAction,
+        onChainEnd,
+        onToolStart,
+        onToolEnd,
+        onStart,
+        getPartialText,
+        ...endpointOption,
+        onProgress: progressCallback.call(null, {
+          res,
+          text,
+          parentMessageId: overrideParentMessageId || userMessageId,
+          plugins,
+        }),
+        abortController,
+      });

-    logger.debug('[/ask/gptPlugins]', response);
+      if (overrideParentMessageId) {
+        response.parentMessageId = overrideParentMessageId;
+      }

-    response.plugins = plugins.map((p) => ({ ...p, loading: false }));
-    await saveMessage({ ...response, user });
+      logger.debug('[/ask/gptPlugins]', response);

-    sendMessage(res, {
-      title: await getConvoTitle(user, conversationId),
-      final: true,
-      conversation: await getConvo(user, conversationId),
-      requestMessage: userMessage,
-      responseMessage: response,
-    });
-    res.end();
+      response.plugins = plugins.map((p) => ({ ...p, loading: false }));
+      await saveMessage({ ...response, user });

-    if (parentMessageId === Constants.NO_PARENT && newConvo) {
-      addTitle(req, {
-        text,
-        response,
-        client,
+      sendMessage(res, {
+        title: await getConvoTitle(user, conversationId),
+        final: true,
+        conversation: await getConvo(user, conversationId),
+        requestMessage: userMessage,
+        responseMessage: response,
+      });
+      res.end();
+
+      if (parentMessageId === Constants.NO_PARENT && newConvo) {
+        addTitle(req, {
+          text,
+          response,
+          client,
+        });
+      }
+    } catch (error) {
+      const partialText = getPartialText();
+      handleAbortError(res, req, error, {
+        partialText,
+        conversationId,
+        sender,
+        messageId: responseMessageId,
+        parentMessageId: userMessageId ?? parentMessageId,
      });
    }
-  } catch (error) {
-    const partialText = getPartialText();
-    handleAbortError(res, req, error, {
-      partialText,
-      conversationId,
-      sender,
-      messageId: responseMessageId,
-      parentMessageId: userMessageId ?? parentMessageId,
-    });
-  }
-});
+  },
+);

 module.exports = router;
--- a/api/server/routes/ask/openAI.js
+++ b/api/server/routes/ask/openAI.js
@@ -4,6 +4,7 @@ const { addTitle, initializeClient } = require('~/server/services/Endpoints/open
 const {
  handleAbort,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
  moderateText,
@@ -13,8 +14,15 @@ const router = express.Router();
 router.use(moderateText);
 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await AskController(req, res, next, initializeClient, addTitle);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await AskController(req, res, next, initializeClient, addTitle);
+  },
+);

 module.exports = router;
--- a/api/server/routes/assistants/actions.js
+++ b/api/server/routes/assistants/actions.js
@@ -1,10 +1,10 @@
 const { v4 } = require('uuid');
 const express = require('express');
 const { actionDelimiter } = require('librechat-data-provider');
-const { initializeClient } = require('~/server/services/Endpoints/assistant');
+const { initializeClient } = require('~/server/services/Endpoints/assistants');
+const { encryptMetadata, domainParser } = require('~/server/services/ActionService');
 const { updateAction, getActions, deleteAction } = require('~/models/Action');
 const { updateAssistant, getAssistant } = require('~/models/Assistant');
-const { encryptMetadata } = require('~/server/services/ActionService');
 const { logger } = require('~/config');

 const router = express.Router();
@@ -17,7 +17,7 @@ const router = express.Router();
 */
 router.get('/', async (req, res) => {
  try {
-    res.json(await getActions({ user: req.user.id }));
+    res.json(await getActions());
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
@@ -44,7 +44,10 @@ router.post('/:assistant_id', async (req, res) => {

    let metadata = encryptMetadata(_metadata);

-    const { domain } = metadata;
+    let { domain } = metadata;
+    /* Azure doesn't support periods in function names */
+    domain = domainParser(req, domain, true);
+
    if (!domain) {
      return res.status(400).json({ message: 'No domain provided' });
    }
@@ -55,9 +58,9 @@ router.post('/:assistant_id', async (req, res) => {
    /** @type {{ openai: OpenAI }} */
    const { openai } = await initializeClient({ req, res });

-    initialPromises.push(getAssistant({ assistant_id, user: req.user.id }));
+    initialPromises.push(getAssistant({ assistant_id }));
    initialPromises.push(openai.beta.assistants.retrieve(assistant_id));
-    !!_action_id && initialPromises.push(getActions({ user: req.user.id, action_id }, true));
+    !!_action_id && initialPromises.push(getActions({ action_id }, true));

    /** @type {[AssistantDocument, Assistant, [Action|undefined]]} */
    const [assistant_data, assistant, actions_result] = await Promise.all(initialPromises);
@@ -74,14 +77,7 @@ router.post('/:assistant_id', async (req, res) => {
    const { actions: _actions = [] } = assistant_data ?? {};
    const actions = [];
    for (const action of _actions) {
-      const [action_domain, current_action_id] = action.split(actionDelimiter);
-      if (action_domain === domain && !_action_id) {
-        // TODO: dupe check on the frontend
-        return res.status(400).json({
-          message: `Action sets cannot have duplicate domains - ${domain} already exists on another action`,
-        });
-      }
-
+      const [_action_domain, current_action_id] = action.split(actionDelimiter);
      if (current_action_id === action_id) {
        continue;
      }
@@ -115,14 +111,15 @@ router.post('/:assistant_id', async (req, res) => {
    const promises = [];
    promises.push(
      updateAssistant(
-        { assistant_id, user: req.user.id },
+        { assistant_id },
        {
          actions,
+          user: req.user.id,
        },
      ),
    );
    promises.push(openai.beta.assistants.update(assistant_id, { tools }));
-    promises.push(updateAction({ action_id, user: req.user.id }, { metadata, assistant_id }));
+    promises.push(updateAction({ action_id }, { metadata, assistant_id, user: req.user.id }));

    /** @type {[AssistantDocument, Assistant, Action]} */
    const resolved = await Promise.all(promises);
@@ -147,21 +144,22 @@ router.post('/:assistant_id', async (req, res) => {
 * @param {string} req.params.action_id - The ID of the action to delete.
 * @returns {Object} 200 - success response - application/json
 */
-router.delete('/:assistant_id/:action_id', async (req, res) => {
+router.delete('/:assistant_id/:action_id/:model', async (req, res) => {
  try {
-    const { assistant_id, action_id } = req.params;
+    const { assistant_id, action_id, model } = req.params;
+    req.body.model = model;

    /** @type {{ openai: OpenAI }} */
    const { openai } = await initializeClient({ req, res });

    const initialPromises = [];
-    initialPromises.push(getAssistant({ assistant_id, user: req.user.id }));
+    initialPromises.push(getAssistant({ assistant_id }));
    initialPromises.push(openai.beta.assistants.retrieve(assistant_id));

    /** @type {[AssistantDocument, Assistant]} */
    const [assistant_data, assistant] = await Promise.all(initialPromises);

-    const { actions } = assistant_data ?? {};
+    const { actions = [] } = assistant_data ?? {};
    const { tools = [] } = assistant ?? {};

    let domain = '';
@@ -173,6 +171,8 @@ router.delete('/:assistant_id/:action_id', async (req, res) => {
      return true;
    });

+    domain = domainParser(req, domain, true);
+
    const updatedTools = tools.filter(
      (tool) => !(tool.function && tool.function.name.includes(domain)),
    );
@@ -180,14 +180,15 @@ router.delete('/:assistant_id/:action_id', async (req, res) => {
    const promises = [];
    promises.push(
      updateAssistant(
-        { assistant_id, user: req.user.id },
+        { assistant_id },
        {
          actions: updatedActions,
+          user: req.user.id,
        },
      ),
    );
    promises.push(openai.beta.assistants.update(assistant_id, { tools: updatedTools }));
-    promises.push(deleteAction({ action_id, user: req.user.id }));
+    promises.push(deleteAction({ action_id }));

    await Promise.all(promises);
    res.status(200).json({ message: 'Action deleted successfully' });
--- a/api/server/routes/assistants/assistants.js
+++ b/api/server/routes/assistants/assistants.js
@@ -1,10 +1,14 @@
 const multer = require('multer');
 const express = require('express');
 const { FileContext, EModelEndpoint } = require('librechat-data-provider');
-const { updateAssistant, getAssistants } = require('~/models/Assistant');
-const { initializeClient } = require('~/server/services/Endpoints/assistant');
+const {
+  initializeClient,
+  listAssistantsForAzure,
+  listAssistants,
+} = require('~/server/services/Endpoints/assistants');
 const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const { uploadImageBuffer } = require('~/server/services/Files/process');
+const { updateAssistant, getAssistants } = require('~/models/Assistant');
 const { deleteFileByFilter } = require('~/models/File');
 const { logger } = require('~/config');
 const actions = require('./actions');
@@ -48,6 +52,10 @@ router.post('/', async (req, res) => {
      })
      .filter((tool) => tool);

+    if (openai.locals?.azureOptions) {
+      assistantData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+    }
+
    const assistant = await openai.beta.assistants.create(assistantData);
    logger.debug('/assistants/', assistant);
    res.status(201).json(assistant);
@@ -101,6 +109,10 @@ router.patch('/:id', async (req, res) => {
      })
      .filter((tool) => tool);

+    if (openai.locals?.azureOptions && updateData.model) {
+      updateData.model = openai.locals.azureOptions.azureOpenAIApiDeploymentName;
+    }
+
    const updatedAssistant = await openai.beta.assistants.update(assistant_id, updateData);
    res.json(updatedAssistant);
  } catch (error) {
@@ -137,19 +149,18 @@ router.delete('/:id', async (req, res) => {
 */
 router.get('/', async (req, res) => {
  try {
-    /** @type {{ openai: OpenAI }} */
-    const { openai } = await initializeClient({ req, res });
-
-    const { limit, order, after, before } = req.query;
-    const response = await openai.beta.assistants.list({
-      limit,
-      order,
-      after,
-      before,
-    });
+    const { limit = 100, order = 'desc', after, before } = req.query;
+    const query = { limit, order, after, before };

+    const azureConfig = req.app.locals[EModelEndpoint.azureOpenAI];
    /** @type {AssistantListResponse} */
-    let body = response.body;
+    let body;
+
+    if (azureConfig?.assistants) {
+      body = await listAssistantsForAzure({ req, res, azureConfig, query });
+    } else {
+      ({ body } = await listAssistants({ req, res, query }));
+    }

    if (req.app.locals?.[EModelEndpoint.assistants]) {
      /** @type {Partial<TAssistantEndpoint>} */
@@ -165,7 +176,7 @@ router.get('/', async (req, res) => {
    res.json(body);
  } catch (error) {
    logger.error('[/assistants] Error listing assistants', error);
-    res.status(500).json({ error: error.message });
+    res.status(500).json({ message: 'Error listing assistants' });
  }
 });

@@ -230,12 +241,13 @@ router.post('/avatar/:assistant_id', upload.single('file'), async (req, res) =>
    const promises = [];
    promises.push(
      updateAssistant(
-        { assistant_id, user: req.user.id },
+        { assistant_id },
        {
          avatar: {
            filepath: image.filepath,
            source: req.app.locals.fileStrategy,
          },
+          user: req.user.id,
        },
      ),
    );
--- a/api/server/routes/assistants/chat.js
+++ b/api/server/routes/assistants/chat.js
@@ -1,6 +1,16 @@
 const { v4 } = require('uuid');
 const express = require('express');
-const { EModelEndpoint, Constants, RunStatus, CacheKeys } = require('librechat-data-provider');
+const {
+  Constants,
+  RunStatus,
+  CacheKeys,
+  FileSources,
+  ContentTypes,
+  EModelEndpoint,
+  ViolationTypes,
+  ImageVisionTool,
+  AssistantStreamEvents,
+} = require('librechat-data-provider');
 const {
  initThread,
  recordUsage,
@@ -9,18 +19,23 @@ const {
  addThreadMetadata,
  saveAssistantMessage,
 } = require('~/server/services/Threads');
+const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils');
 const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService');
-const { addTitle, initializeClient } = require('~/server/services/Endpoints/assistant');
-const { createRun, sleep } = require('~/server/services/Runs');
+const { addTitle, initializeClient } = require('~/server/services/Endpoints/assistants');
+const { formatMessage, createVisionPrompt } = require('~/app/clients/prompts');
+const { createRun, StreamRunManager } = require('~/server/services/Runs');
+const { getTransactions } = require('~/models/Transaction');
+const checkBalance = require('~/models/checkBalance');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
-const { sendMessage } = require('~/server/utils');
+const { getModelMaxTokens } = require('~/utils');
 const { logger } = require('~/config');

 const router = express.Router();
 const {
  setHeaders,
  handleAbort,
+  validateModel,
  handleAbortError,
  // validateEndpoint,
  buildEndpointOption,
@@ -28,6 +43,8 @@ const {

 router.post('/abort', handleAbort());

+const ten_minutes = 1000 * 60 * 10;
+
 /**
 * @route POST /
 * @desc Chat with an assistant
@@ -36,8 +53,9 @@ router.post('/abort', handleAbort());
 * @param {express.Response} res - The response object, used to send back a response.
 * @returns {void}
 */
-router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
+router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res) => {
  logger.debug('[/assistants/chat/] req.body', req.body);
+
  const {
    text,
    model,
@@ -85,6 +103,16 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
  let parentMessageId = _parentId;
  /** @type {TMessage[]} */
  let previousMessages = [];
+  /** @type {import('librechat-data-provider').TConversation | null} */
+  let conversation = null;
+  /** @type {string[]} */
+  let file_ids = [];
+  /** @type {Set<string>} */
+  let attachedFileIds = new Set();
+  /** @type {TMessage | null} */
+  let requestMessage = null;
+  /** @type {undefined | Promise<ChatCompletion>} */
+  let visionPromise;

  const userMessageId = v4();
  const responseMessageId = v4();
@@ -95,15 +123,195 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
  const cache = getLogStores(CacheKeys.ABORT_KEYS);
  const cacheKey = `${req.user.id}:${conversationId}`;

+  /** @type {Run | undefined} - The completed run, undefined if incomplete */
+  let completedRun;
+
+  const handleError = async (error) => {
+    const defaultErrorMessage =
+      'The Assistant run failed to initialize. Try sending a message in a new conversation.';
+    const messageData = {
+      thread_id,
+      assistant_id,
+      conversationId,
+      parentMessageId,
+      sender: 'System',
+      user: req.user.id,
+      shouldSaveMessage: false,
+      messageId: responseMessageId,
+      endpoint: EModelEndpoint.assistants,
+    };
+
+    if (error.message === 'Run cancelled') {
+      return res.end();
+    } else if (error.message === 'Request closed' && completedRun) {
+      return;
+    } else if (error.message === 'Request closed') {
+      logger.debug('[/assistants/chat/] Request aborted on close');
+    } else if (/Files.*are invalid/.test(error.message)) {
+      const errorMessage = `Files are invalid, or may not have uploaded yet.${
+        req.app.locals?.[EModelEndpoint.azureOpenAI].assistants
+          ? ' If using Azure OpenAI, files are only available in the region of the assistant\'s model at the time of upload.'
+          : ''
+      }`;
+      return sendResponse(res, messageData, errorMessage);
+    } else if (error?.message?.includes('string too long')) {
+      return sendResponse(
+        res,
+        messageData,
+        'Message too long. The Assistants API has a limit of 32,768 characters per message. Please shorten it and try again.',
+      );
+    } else if (error?.message?.includes(ViolationTypes.TOKEN_BALANCE)) {
+      return sendResponse(res, messageData, error.message);
+    } else {
+      logger.error('[/assistants/chat/]', error);
+    }
+
+    if (!openai || !thread_id || !run_id) {
+      return sendResponse(res, messageData, defaultErrorMessage);
+    }
+
+    await sleep(2000);
+
+    try {
+      const status = await cache.get(cacheKey);
+      if (status === 'cancelled') {
+        logger.debug('[/assistants/chat/] Run already cancelled');
+        return res.end();
+      }
+      await cache.delete(cacheKey);
+      const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
+      logger.debug('[/assistants/chat/] Cancelled run:', cancelledRun);
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error cancelling run', error);
+    }
+
+    await sleep(2000);
+
+    let run;
+    try {
+      run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
+      await recordUsage({
+        ...run.usage,
+        model: run.model,
+        user: req.user.id,
+        conversationId,
+      });
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error fetching or processing run', error);
+    }
+
+    let finalEvent;
+    try {
+      const runMessages = await checkMessageGaps({
+        openai,
+        run_id,
+        thread_id,
+        conversationId,
+        latestMessageId: responseMessageId,
+      });
+
+      const errorContentPart = {
+        text: {
+          value:
+            error?.message ?? 'There was an error processing your request. Please try again later.',
+        },
+        type: ContentTypes.ERROR,
+      };
+
+      if (!Array.isArray(runMessages[runMessages.length - 1]?.content)) {
+        runMessages[runMessages.length - 1].content = [errorContentPart];
+      } else {
+        const contentParts = runMessages[runMessages.length - 1].content;
+        for (let i = 0; i < contentParts.length; i++) {
+          const currentPart = contentParts[i];
+          /** @type {CodeToolCall | RetrievalToolCall | FunctionToolCall | undefined} */
+          const toolCall = currentPart?.[ContentTypes.TOOL_CALL];
+          if (
+            toolCall &&
+            toolCall?.function &&
+            !(toolCall?.function?.output || toolCall?.function?.output?.length)
+          ) {
+            contentParts[i] = {
+              ...currentPart,
+              [ContentTypes.TOOL_CALL]: {
+                ...toolCall,
+                function: {
+                  ...toolCall.function,
+                  output: 'error processing tool',
+                },
+              },
+            };
+          }
+        }
+        runMessages[runMessages.length - 1].content.push(errorContentPart);
+      }
+
+      finalEvent = {
+        title: 'New Chat',
+        final: true,
+        conversation: await getConvo(req.user.id, conversationId),
+        runMessages,
+      };
+    } catch (error) {
+      logger.error('[/assistants/chat/] Error finalizing error process', error);
+      return sendResponse(res, messageData, 'The Assistant run failed');
+    }
+
+    return sendResponse(res, finalEvent);
+  };
+
  try {
+    res.on('close', async () => {
+      if (!completedRun) {
+        await handleError(new Error('Request closed'));
+      }
+    });
+
    if (convoId && !_thread_id) {
+      completedRun = true;
      throw new Error('Missing thread_id for existing conversation');
    }

    if (!assistant_id) {
+      completedRun = true;
      throw new Error('Missing assistant_id');
    }

+    const checkBalanceBeforeRun = async () => {
+      if (!isEnabled(process.env.CHECK_BALANCE)) {
+        return;
+      }
+      const transactions =
+        (await getTransactions({
+          user: req.user.id,
+          context: 'message',
+          conversationId,
+        })) ?? [];
+
+      const totalPreviousTokens = Math.abs(
+        transactions.reduce((acc, curr) => acc + curr.rawAmount, 0),
+      );
+
+      // TODO: make promptBuffer a config option; buffer for titles, needs buffer for system instructions
+      const promptBuffer = parentMessageId === Constants.NO_PARENT && !_thread_id ? 200 : 0;
+      // 5 is added for labels
+      let promptTokens = (await countTokens(text + (promptPrefix ?? ''))) + 5;
+      promptTokens += totalPreviousTokens + promptBuffer;
+      // Count tokens up to the current context window
+      promptTokens = Math.min(promptTokens, getModelMaxTokens(model));
+
+      await checkBalance({
+        req,
+        res,
+        txData: {
+          model,
+          user: req.user.id,
+          tokenType: 'prompt',
+          amount: promptTokens,
+        },
+      });
+    };
+
    /** @type {{ openai: OpenAIClient }} */
    const { openai: _openai, client } = await initializeClient({
      req,
@@ -114,15 +322,11 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {

    openai = _openai;

-    // if (thread_id) {
-    //   previousMessages = await checkMessageGaps({ openai, thread_id, conversationId });
-    // }
-
    if (previousMessages.length) {
      parentMessageId = previousMessages[previousMessages.length - 1].messageId;
    }

-    const userMessage = {
+    let userMessage = {
      role: 'user',
      content: text,
      metadata: {
@@ -130,75 +334,7 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
      },
    };

-    let thread_file_ids = [];
-    if (convoId) {
-      const convo = await getConvo(req.user.id, convoId);
-      if (convo && convo.file_ids) {
-        thread_file_ids = convo.file_ids;
-      }
-    }
-
-    const file_ids = files.map(({ file_id }) => file_id);
-    if (file_ids.length || thread_file_ids.length) {
-      userMessage.file_ids = file_ids;
-      openai.attachedFileIds = new Set([...file_ids, ...thread_file_ids]);
-    }
-
-    // TODO: may allow multiple messages to be created beforehand in a future update
-    const initThreadBody = {
-      messages: [userMessage],
-      metadata: {
-        user: req.user.id,
-        conversationId,
-      },
-    };
-
-    const result = await initThread({ openai, body: initThreadBody, thread_id });
-    thread_id = result.thread_id;
-
-    createOnTextProgress({
-      openai,
-      conversationId,
-      userMessageId,
-      messageId: responseMessageId,
-      thread_id,
-    });
-
-    const requestMessage = {
-      user: req.user.id,
-      text,
-      messageId: userMessageId,
-      parentMessageId,
-      // TODO: make sure client sends correct format for `files`, use zod
-      files,
-      file_ids,
-      conversationId,
-      isCreatedByUser: true,
-      assistant_id,
-      thread_id,
-      model: assistant_id,
-    };
-
-    previousMessages.push(requestMessage);
-
-    await saveUserMessage({ ...requestMessage, model });
-
-    const conversation = {
-      conversationId,
-      // TODO: title feature
-      title: 'New Chat',
-      endpoint: EModelEndpoint.assistants,
-      promptPrefix: promptPrefix,
-      instructions: instructions,
-      assistant_id,
-      // model,
-    };
-
-    if (file_ids.length) {
-      conversation.file_ids = file_ids;
-    }
-
-    /** @type {CreateRunBody} */
+    /** @type {CreateRunBody | undefined} */
    const body = {
      assistant_id,
      model,
@@ -212,51 +348,256 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
      body.instructions = instructions;
    }

-    /* NOTE:
-     * By default, a Run will use the model and tools configuration specified in Assistant object,
-     * but you can override most of these when creating the Run for added flexibility:
-     */
-    const run = await createRun({
-      openai,
-      thread_id,
-      body,
-    });
+    const getRequestFileIds = async () => {
+      let thread_file_ids = [];
+      if (convoId) {
+        const convo = await getConvo(req.user.id, convoId);
+        if (convo && convo.file_ids) {
+          thread_file_ids = convo.file_ids;
+        }
+      }

-    run_id = run.id;
-    await cache.set(cacheKey, `${thread_id}:${run_id}`);
+      file_ids = files.map(({ file_id }) => file_id);
+      if (file_ids.length || thread_file_ids.length) {
+        userMessage.file_ids = file_ids;
+        attachedFileIds = new Set([...file_ids, ...thread_file_ids]);
+      }
+    };

-    sendMessage(res, {
-      sync: true,
-      conversationId,
-      // messages: previousMessages,
-      requestMessage,
-      responseMessage: {
-        user: req.user.id,
-        messageId: openai.responseMessage.messageId,
-        parentMessageId: userMessageId,
+    const addVisionPrompt = async () => {
+      if (!req.body.endpointOption.attachments) {
+        return;
+      }
+
+      /** @type {MongoFile[]} */
+      const attachments = await req.body.endpointOption.attachments;
+      if (
+        attachments &&
+        attachments.every((attachment) => attachment.source === FileSources.openai)
+      ) {
+        return;
+      }
+
+      const assistant = await openai.beta.assistants.retrieve(assistant_id);
+      const visionToolIndex = assistant.tools.findIndex(
+        (tool) => tool?.function && tool?.function?.name === ImageVisionTool.function.name,
+      );
+
+      if (visionToolIndex === -1) {
+        return;
+      }
+
+      let visionMessage = {
+        role: 'user',
+        content: '',
+      };
+      const files = await client.addImageURLs(visionMessage, attachments);
+      if (!visionMessage.image_urls?.length) {
+        return;
+      }
+
+      const imageCount = visionMessage.image_urls.length;
+      const plural = imageCount > 1;
+      visionMessage.content = createVisionPrompt(plural);
+      visionMessage = formatMessage({ message: visionMessage, endpoint: EModelEndpoint.openAI });
+
+      visionPromise = openai.chat.completions.create({
+        model: 'gpt-4-vision-preview',
+        messages: [visionMessage],
+        max_tokens: 4000,
+      });
+
+      const pluralized = plural ? 's' : '';
+      body.additional_instructions = `${
+        body.additional_instructions ? `${body.additional_instructions}\n` : ''
+      }The user has uploaded ${imageCount} image${pluralized}.
+      Use the \`${ImageVisionTool.function.name}\` tool to retrieve ${
+  plural ? '' : 'a '
+}detailed text description${pluralized} for ${plural ? 'each' : 'the'} image${pluralized}.`;
+
+      return files;
+    };
+
+    const initializeThread = async () => {
+      /** @type {[ undefined | MongoFile[]]}*/
+      const [processedFiles] = await Promise.all([addVisionPrompt(), getRequestFileIds()]);
+      // TODO: may allow multiple messages to be created beforehand in a future update
+      const initThreadBody = {
+        messages: [userMessage],
+        metadata: {
+          user: req.user.id,
+          conversationId,
+        },
+      };
+
+      if (processedFiles) {
+        for (const file of processedFiles) {
+          if (file.source !== FileSources.openai) {
+            attachedFileIds.delete(file.file_id);
+            const index = file_ids.indexOf(file.file_id);
+            if (index > -1) {
+              file_ids.splice(index, 1);
+            }
+          }
+        }
+
+        userMessage.file_ids = file_ids;
+      }
+
+      const result = await initThread({ openai, body: initThreadBody, thread_id });
+      thread_id = result.thread_id;
+
+      createOnTextProgress({
+        openai,
        conversationId,
+        userMessageId,
+        messageId: responseMessageId,
+        thread_id,
+      });
+
+      requestMessage = {
+        user: req.user.id,
+        text,
+        messageId: userMessageId,
+        parentMessageId,
+        // TODO: make sure client sends correct format for `files`, use zod
+        files,
+        file_ids,
+        conversationId,
+        isCreatedByUser: true,
        assistant_id,
        thread_id,
        model: assistant_id,
-      },
+      };
+
+      previousMessages.push(requestMessage);
+
+      /* asynchronous */
+      saveUserMessage({ ...requestMessage, model });
+
+      conversation = {
+        conversationId,
+        title: 'New Chat',
+        endpoint: EModelEndpoint.assistants,
+        promptPrefix: promptPrefix,
+        instructions: instructions,
+        assistant_id,
+        // model,
+      };
+
+      if (file_ids.length) {
+        conversation.file_ids = file_ids;
+      }
+    };
+
+    const promises = [initializeThread(), checkBalanceBeforeRun()];
+    await Promise.all(promises);
+
+    const sendInitialResponse = () => {
+      sendMessage(res, {
+        sync: true,
+        conversationId,
+        // messages: previousMessages,
+        requestMessage,
+        responseMessage: {
+          user: req.user.id,
+          messageId: openai.responseMessage.messageId,
+          parentMessageId: userMessageId,
+          conversationId,
+          assistant_id,
+          thread_id,
+          model: assistant_id,
+        },
+      });
+    };
+
+    /** @type {RunResponse | typeof StreamRunManager | undefined} */
+    let response;
+
+    const processRun = async (retry = false) => {
+      if (req.app.locals[EModelEndpoint.azureOpenAI]?.assistants) {
+        openai.attachedFileIds = attachedFileIds;
+        openai.visionPromise = visionPromise;
+        if (retry) {
+          response = await runAssistant({
+            openai,
+            thread_id,
+            run_id,
+            in_progress: openai.in_progress,
+          });
+          return;
+        }
+
+        /* NOTE:
+         * By default, a Run will use the model and tools configuration specified in Assistant object,
+         * but you can override most of these when creating the Run for added flexibility:
+         */
+        const run = await createRun({
+          openai,
+          thread_id,
+          body,
+        });
+
+        run_id = run.id;
+        await cache.set(cacheKey, `${thread_id}:${run_id}`, ten_minutes);
+        sendInitialResponse();
+
+        // todo: retry logic
+        response = await runAssistant({ openai, thread_id, run_id });
+        return;
+      }
+
+      /** @type {{[AssistantStreamEvents.ThreadRunCreated]: (event: ThreadRunCreated) => Promise<void>}} */
+      const handlers = {
+        [AssistantStreamEvents.ThreadRunCreated]: async (event) => {
+          await cache.set(cacheKey, `${thread_id}:${event.data.id}`, ten_minutes);
+          run_id = event.data.id;
+          sendInitialResponse();
+        },
+      };
+
+      const streamRunManager = new StreamRunManager({
+        req,
+        res,
+        openai,
+        handlers,
+        thread_id,
+        visionPromise,
+        attachedFileIds,
+        responseMessage: openai.responseMessage,
+        // streamOptions: {
+
+        // },
+      });
+
+      await streamRunManager.runAssistant({
+        thread_id,
+        body,
+      });
+
+      response = streamRunManager;
+    };
+
+    await processRun();
+    logger.debug('[/assistants/chat/] response', {
+      run: response.run,
+      steps: response.steps,
    });

-    // todo: retry logic
-    let response = await runAssistant({ openai, thread_id, run_id });
-    logger.debug('[/assistants/chat/] response', response);
+    if (response.run.status === RunStatus.CANCELLED) {
+      logger.debug('[/assistants/chat/] Run cancelled, handled by `abortRun`');
+      return res.end();
+    }

    if (response.run.status === RunStatus.IN_PROGRESS) {
-      response = await runAssistant({
-        openai,
-        thread_id,
-        run_id,
-        in_progress: openai.in_progress,
-      });
+      processRun(true);
    }

+    completedRun = response.run;
+
    /** @type {ResponseMessage} */
    const responseMessage = {
-      ...openai.responseMessage,
+      ...(response.responseMessage ?? response.finalMessage),
      parentMessageId: userMessageId,
      conversationId,
      user: req.user.id,
@@ -265,9 +606,6 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
      model: assistant_id,
    };

-    // TODO: token count from usage returned in run
-    // TODO: parse responses, save to db, send to user
-
    sendMessage(res, {
      title: 'New Chat',
      final: true,
@@ -284,7 +622,7 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
    if (parentMessageId === Constants.NO_PARENT && !_thread_id) {
      addTitle(req, {
        text,
-        responseText: openai.responseText,
+        responseText: response.text,
        conversationId,
        client,
      });
@@ -299,7 +637,7 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {

    if (!response.run.usage) {
      await sleep(3000);
-      const completedRun = await openai.beta.threads.runs.retrieve(thread_id, run.id);
+      completedRun = await openai.beta.threads.runs.retrieve(thread_id, response.run.id);
      if (completedRun.usage) {
        await recordUsage({
          ...completedRun.usage,
@@ -317,62 +655,7 @@ router.post('/', buildEndpointOption, setHeaders, async (req, res) => {
      });
    }
  } catch (error) {
-    if (error.message === 'Run cancelled') {
-      return res.end();
-    }
-
-    logger.error('[/assistants/chat/]', error);
-
-    if (!openai || !thread_id || !run_id) {
-      return res.status(500).json({ error: 'The Assistant run failed to initialize' });
-    }
-
-    try {
-      await cache.delete(cacheKey);
-      const cancelledRun = await openai.beta.threads.runs.cancel(thread_id, run_id);
-      logger.debug('Cancelled run:', cancelledRun);
-    } catch (error) {
-      logger.error('[abortRun] Error cancelling run', error);
-    }
-
-    await sleep(2000);
-    try {
-      const run = await openai.beta.threads.runs.retrieve(thread_id, run_id);
-      await recordUsage({
-        ...run.usage,
-        model: run.model,
-        user: req.user.id,
-        conversationId,
-      });
-    } catch (error) {
-      logger.error('[/assistants/chat/] Error fetching or processing run', error);
-    }
-
-    try {
-      const runMessages = await checkMessageGaps({
-        openai,
-        run_id,
-        thread_id,
-        conversationId,
-        latestMessageId: responseMessageId,
-      });
-
-      const finalEvent = {
-        title: 'New Chat',
-        final: true,
-        conversation: await getConvo(req.user.id, conversationId),
-        runMessages,
-      };
-
-      if (res.headersSent && finalEvent) {
-        return sendMessage(res, finalEvent);
-      }
-
-      res.json(finalEvent);
-    } catch (error) {
-      logger.error('[/assistants/chat/] Error finalizing error process', error);
-      return res.status(500).json({ error: 'The Assistant run failed' });
-    }
+    await handleError(error);
  }
 });

--- a/api/server/routes/config.js
+++ b/api/server/routes/config.js
@@ -43,6 +43,8 @@ router.get('/', async function (req, res) {
        isBirthday() ||
        isEnabled(process.env.SHOW_BIRTHDAY_ICON) ||
        process.env.SHOW_BIRTHDAY_ICON === '',
+      helpAndFaqURL: process.env.HELP_AND_FAQ_URL || 'https://librechat.ai',
+      interface: req.app.locals.interface,
    };

    if (typeof process.env.CUSTOM_FOOTER === 'string') {
--- a/api/server/routes/convos.js
+++ b/api/server/routes/convos.js
@@ -1,10 +1,10 @@
 const express = require('express');
 const { CacheKeys } = require('librechat-data-provider');
-const { initializeClient } = require('~/server/services/Endpoints/assistant');
+const { initializeClient } = require('~/server/services/Endpoints/assistants');
 const { getConvosByPage, deleteConvos, getConvo, saveConvo } = require('~/models/Conversation');
 const requireJwtAuth = require('~/server/middleware/requireJwtAuth');
-const { sleep } = require('~/server/services/Runs/handle');
 const getLogStores = require('~/cache/getLogStores');
+const { sleep } = require('~/server/utils');
 const { logger } = require('~/config');

 const router = express.Router();
--- a/api/server/routes/edit/anthropic.js
+++ b/api/server/routes/edit/anthropic.js
@@ -4,6 +4,7 @@ const { initializeClient } = require('~/server/services/Endpoints/anthropic');
 const {
  setHeaders,
  handleAbort,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -12,8 +13,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await EditController(req, res, next, initializeClient);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await EditController(req, res, next, initializeClient);
+  },
+);

 module.exports = router;
--- a/api/server/routes/edit/custom.js
+++ b/api/server/routes/edit/custom.js
@@ -5,6 +5,7 @@ const { addTitle } = require('~/server/services/Endpoints/openAI');
 const {
  handleAbort,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -13,8 +14,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await EditController(req, res, next, initializeClient, addTitle);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await EditController(req, res, next, initializeClient, addTitle);
+  },
+);

 module.exports = router;
--- a/api/server/routes/edit/google.js
+++ b/api/server/routes/edit/google.js
@@ -4,6 +4,7 @@ const { initializeClient } = require('~/server/services/Endpoints/google');
 const {
  setHeaders,
  handleAbort,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
 } = require('~/server/middleware');
@@ -12,8 +13,15 @@ const router = express.Router();

 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await EditController(req, res, next, initializeClient);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await EditController(req, res, next, initializeClient);
+  },
+);

 module.exports = router;
--- a/api/server/routes/edit/gptPlugins.js
+++ b/api/server/routes/edit/gptPlugins.js
@@ -1,88 +1,94 @@
 const express = require('express');
-const router = express.Router();
-const { validateTools } = require('~/app');
+const throttle = require('lodash/throttle');
 const { getResponseSender } = require('librechat-data-provider');
-const { saveMessage, getConvoTitle, getConvo } = require('~/models');
-const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
-const { sendMessage, createOnProgress, formatSteps, formatAction } = require('~/server/utils');
 const {
  handleAbort,
  createAbortController,
  handleAbortError,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
  moderateText,
 } = require('~/server/middleware');
+const { sendMessage, createOnProgress, formatSteps, formatAction } = require('~/server/utils');
+const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
+const { saveMessage, getConvoTitle, getConvo } = require('~/models');
+const { validateTools } = require('~/app');
 const { logger } = require('~/config');

+const router = express.Router();
+
 router.use(moderateText);
 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res) => {
-  let {
-    text,
-    generation,
-    endpointOption,
-    conversationId,
-    responseMessageId,
-    isContinued = false,
-    parentMessageId = null,
-    overrideParentMessageId = null,
-  } = req.body;
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res) => {
+    let {
+      text,
+      generation,
+      endpointOption,
+      conversationId,
+      responseMessageId,
+      isContinued = false,
+      parentMessageId = null,
+      overrideParentMessageId = null,
+    } = req.body;

-  logger.debug('[/edit/gptPlugins]', {
-    text,
-    generation,
-    isContinued,
-    conversationId,
-    ...endpointOption,
-  });
-  let metadata;
-  let userMessage;
-  let promptTokens;
-  let lastSavedTimestamp = 0;
-  let saveDelay = 100;
-  const sender = getResponseSender({ ...endpointOption, model: endpointOption.modelOptions.model });
-  const userMessageId = parentMessageId;
-  const user = req.user.id;
+    logger.debug('[/edit/gptPlugins]', {
+      text,
+      generation,
+      isContinued,
+      conversationId,
+      ...endpointOption,
+    });

-  const plugin = {
-    loading: true,
-    inputs: [],
-    latest: null,
-    outputs: null,
-  };
+    let userMessage;
+    let promptTokens;
+    const sender = getResponseSender({
+      ...endpointOption,
+      model: endpointOption.modelOptions.model,
+    });
+    const userMessageId = parentMessageId;
+    const user = req.user.id;

-  const addMetadata = (data) => (metadata = data);
-  const getReqData = (data = {}) => {
-    for (let key in data) {
-      if (key === 'userMessage') {
-        userMessage = data[key];
-      } else if (key === 'responseMessageId') {
-        responseMessageId = data[key];
-      } else if (key === 'promptTokens') {
-        promptTokens = data[key];
+    const plugin = {
+      loading: true,
+      inputs: [],
+      latest: null,
+      outputs: null,
+    };
+
+    const getReqData = (data = {}) => {
+      for (let key in data) {
+        if (key === 'userMessage') {
+          userMessage = data[key];
+        } else if (key === 'responseMessageId') {
+          responseMessageId = data[key];
+        } else if (key === 'promptTokens') {
+          promptTokens = data[key];
+        }
      }
-    }
-  };
+    };

-  const {
-    onProgress: progressCallback,
-    sendIntermediateMessage,
-    getPartialText,
-  } = createOnProgress({
-    generation,
-    onProgress: ({ text: partialText }) => {
-      const currentTimestamp = Date.now();
+    const throttledSaveMessage = throttle(saveMessage, 3000, { trailing: false });
+    const {
+      onProgress: progressCallback,
+      sendIntermediateMessage,
+      getPartialText,
+    } = createOnProgress({
+      generation,
+      onProgress: ({ text: partialText }) => {
+        if (plugin.loading === true) {
+          plugin.loading = false;
+        }

-      if (plugin.loading === true) {
-        plugin.loading = false;
-      }
-
-      if (currentTimestamp - lastSavedTimestamp > saveDelay) {
-        lastSavedTimestamp = currentTimestamp;
-        saveMessage({
+        throttledSaveMessage({
          messageId: responseMessageId,
          sender,
          conversationId,
@@ -94,104 +100,95 @@ router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req,
          error: false,
          user,
        });
-      }
+      },
+    });

-      if (saveDelay < 500) {
-        saveDelay = 500;
+    const onAgentAction = (action, start = false) => {
+      const formattedAction = formatAction(action);
+      plugin.inputs.push(formattedAction);
+      plugin.latest = formattedAction.plugin;
+      if (!start) {
+        saveMessage({ ...userMessage, user });
      }
-    },
-  });
+      sendIntermediateMessage(res, { plugin });
+      // logger.debug('PLUGIN ACTION', formattedAction);
+    };

-  const onAgentAction = (action, start = false) => {
-    const formattedAction = formatAction(action);
-    plugin.inputs.push(formattedAction);
-    plugin.latest = formattedAction.plugin;
-    if (!start) {
+    const onChainEnd = (data) => {
+      let { intermediateSteps: steps } = data;
+      plugin.outputs = steps && steps[0].action ? formatSteps(steps) : 'An error occurred.';
+      plugin.loading = false;
      saveMessage({ ...userMessage, user });
-    }
-    sendIntermediateMessage(res, { plugin });
-    // logger.debug('PLUGIN ACTION', formattedAction);
-  };
+      sendIntermediateMessage(res, { plugin });
+      // logger.debug('CHAIN END', plugin.outputs);
+    };

-  const onChainEnd = (data) => {
-    let { intermediateSteps: steps } = data;
-    plugin.outputs = steps && steps[0].action ? formatSteps(steps) : 'An error occurred.';
-    plugin.loading = false;
-    saveMessage({ ...userMessage, user });
-    sendIntermediateMessage(res, { plugin });
-    // logger.debug('CHAIN END', plugin.outputs);
-  };
-
-  const getAbortData = () => ({
-    sender,
-    conversationId,
-    messageId: responseMessageId,
-    parentMessageId: overrideParentMessageId ?? userMessageId,
-    text: getPartialText(),
-    plugin: { ...plugin, loading: false },
-    userMessage,
-    promptTokens,
-  });
-  const { abortController, onStart } = createAbortController(req, res, getAbortData);
-
-  try {
-    endpointOption.tools = await validateTools(user, endpointOption.tools);
-    const { client } = await initializeClient({ req, res, endpointOption });
-
-    let response = await client.sendMessage(text, {
-      user,
-      generation,
-      isContinued,
-      isEdited: true,
-      conversationId,
-      parentMessageId,
-      responseMessageId,
-      overrideParentMessageId,
-      getReqData,
-      onAgentAction,
-      onChainEnd,
-      onStart,
-      addMetadata,
-      ...endpointOption,
-      onProgress: progressCallback.call(null, {
-        res,
-        text,
-        plugin,
-        parentMessageId: overrideParentMessageId || userMessageId,
-      }),
-      abortController,
-    });
-
-    if (overrideParentMessageId) {
-      response.parentMessageId = overrideParentMessageId;
-    }
-
-    if (metadata) {
-      response = { ...response, ...metadata };
-    }
-
-    logger.debug('[/edit/gptPlugins] CLIENT RESPONSE', response);
-    response.plugin = { ...plugin, loading: false };
-    await saveMessage({ ...response, user });
-
-    sendMessage(res, {
-      title: await getConvoTitle(user, conversationId),
-      final: true,
-      conversation: await getConvo(user, conversationId),
-      requestMessage: userMessage,
-      responseMessage: response,
-    });
-    res.end();
-  } catch (error) {
-    const partialText = getPartialText();
-    handleAbortError(res, req, error, {
-      partialText,
-      conversationId,
+    const getAbortData = () => ({
      sender,
+      conversationId,
      messageId: responseMessageId,
-      parentMessageId: userMessageId ?? parentMessageId,
+      parentMessageId: overrideParentMessageId ?? userMessageId,
+      text: getPartialText(),
+      plugin: { ...plugin, loading: false },
+      userMessage,
+      promptTokens,
    });
-  }
-});
+    const { abortController, onStart } = createAbortController(req, res, getAbortData);
+
+    try {
+      endpointOption.tools = await validateTools(user, endpointOption.tools);
+      const { client } = await initializeClient({ req, res, endpointOption });
+
+      let response = await client.sendMessage(text, {
+        user,
+        generation,
+        isContinued,
+        isEdited: true,
+        conversationId,
+        parentMessageId,
+        responseMessageId,
+        overrideParentMessageId,
+        getReqData,
+        onAgentAction,
+        onChainEnd,
+        onStart,
+        ...endpointOption,
+        onProgress: progressCallback.call(null, {
+          res,
+          text,
+          plugin,
+          parentMessageId: overrideParentMessageId || userMessageId,
+        }),
+        abortController,
+      });
+
+      if (overrideParentMessageId) {
+        response.parentMessageId = overrideParentMessageId;
+      }
+
+      logger.debug('[/edit/gptPlugins] CLIENT RESPONSE', response);
+      response.plugin = { ...plugin, loading: false };
+      await saveMessage({ ...response, user });
+
+      sendMessage(res, {
+        title: await getConvoTitle(user, conversationId),
+        final: true,
+        conversation: await getConvo(user, conversationId),
+        requestMessage: userMessage,
+        responseMessage: response,
+      });
+      res.end();
+    } catch (error) {
+      const partialText = getPartialText();
+      handleAbortError(res, req, error, {
+        partialText,
+        conversationId,
+        sender,
+        messageId: responseMessageId,
+        parentMessageId: userMessageId ?? parentMessageId,
+      });
+    }
+  },
+);

 module.exports = router;
--- a/api/server/routes/edit/openAI.js
+++ b/api/server/routes/edit/openAI.js
@@ -4,6 +4,7 @@ const { initializeClient } = require('~/server/services/Endpoints/openAI');
 const {
  handleAbort,
  setHeaders,
+  validateModel,
  validateEndpoint,
  buildEndpointOption,
  moderateText,
@@ -13,8 +14,15 @@ const router = express.Router();
 router.use(moderateText);
 router.post('/abort', handleAbort());

-router.post('/', validateEndpoint, buildEndpointOption, setHeaders, async (req, res, next) => {
-  await EditController(req, res, next, initializeClient);
-});
+router.post(
+  '/',
+  validateEndpoint,
+  validateModel,
+  buildEndpointOption,
+  setHeaders,
+  async (req, res, next) => {
+    await EditController(req, res, next, initializeClient);
+  },
+);

 module.exports = router;
--- a/api/server/routes/files/files.js
+++ b/api/server/routes/files/files.js
@@ -1,12 +1,13 @@
-const axios = require('axios');
 const fs = require('fs').promises;
 const express = require('express');
-const { isUUID } = require('librechat-data-provider');
+const { isUUID, FileSources } = require('librechat-data-provider');
 const {
  filterFile,
  processFileUpload,
  processDeleteRequest,
 } = require('~/server/services/Files/process');
+const { initializeClient } = require('~/server/services/Endpoints/assistants');
+const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const { getFiles } = require('~/models/File');
 const { logger } = require('~/config');

@@ -44,7 +45,7 @@ router.delete('/', async (req, res) => {
        return false;
      }

-      if (/^file-/.test(file.file_id)) {
+      if (/^(file|assistant)-/.test(file.file_id)) {
        return true;
      }

@@ -65,28 +66,64 @@ router.delete('/', async (req, res) => {
  }
 });

-router.get('/download/:fileId', async (req, res) => {
+router.get('/download/:userId/:filepath', async (req, res) => {
  try {
-    const { fileId } = req.params;
+    const { userId, filepath } = req.params;

-    const options = {
-      headers: {
-        // TODO: Client initialization for OpenAI API Authentication
-        Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
-      },
-      responseType: 'stream',
+    if (userId !== req.user.id) {
+      logger.warn(`${errorPrefix} forbidden: ${file_id}`);
+      return res.status(403).send('Forbidden');
+    }
+
+    const parts = filepath.split('/');
+    const file_id = parts[2];
+    const [file] = await getFiles({ file_id });
+    const errorPrefix = `File download requested by user ${userId}`;
+
+    if (!file) {
+      logger.warn(`${errorPrefix} not found: ${file_id}`);
+      return res.status(404).send('File not found');
+    }
+
+    if (!file.filepath.includes(userId)) {
+      logger.warn(`${errorPrefix} forbidden: ${file_id}`);
+      return res.status(403).send('Forbidden');
+    }
+
+    if (file.source === FileSources.openai && !file.model) {
+      logger.warn(`${errorPrefix} has no associated model: ${file_id}`);
+      return res.status(400).send('The model used when creating this file is not available');
+    }
+
+    const { getDownloadStream } = getStrategyFunctions(file.source);
+    if (!getDownloadStream) {
+      logger.warn(`${errorPrefix} has no stream method implemented: ${file.source}`);
+      return res.status(501).send('Not Implemented');
+    }
+
+    const setHeaders = () => {
+      res.setHeader('Content-Disposition', `attachment; filename="${file.filename}"`);
+      res.setHeader('Content-Type', 'application/octet-stream');
+      res.setHeader('X-File-Metadata', JSON.stringify(file));
    };

-    const fileResponse = await axios.get(`https://api.openai.com/v1/files/${fileId}`, {
-      headers: options.headers,
-    });
-    const { filename } = fileResponse.data;
-
-    const response = await axios.get(`https://api.openai.com/v1/files/${fileId}/content`, options);
-    res.setHeader('Content-Disposition', `attachment; filename="${filename}"`);
-    response.data.pipe(res);
+    /** @type {{ body: import('stream').PassThrough } | undefined} */
+    let passThrough;
+    /** @type {ReadableStream | undefined} */
+    let fileStream;
+    if (file.source === FileSources.openai) {
+      req.body = { model: file.model };
+      const { openai } = await initializeClient({ req, res });
+      passThrough = await getDownloadStream(file_id, openai);
+      setHeaders();
+      passThrough.body.pipe(res);
+    } else {
+      fileStream = getDownloadStream(file_id);
+      setHeaders();
+      fileStream.pipe(res);
+    }
  } catch (error) {
-    console.error('Error downloading file:', error);
+    logger.error('Error downloading file:', error);
    res.status(500).send('Error downloading file');
  }
 });
--- a/api/server/routes/files/multer.js
+++ b/api/server/routes/files/multer.js
@@ -15,6 +15,7 @@ const storage = multer.diskStorage({
  },
  filename: function (req, file, cb) {
    req.file_id = crypto.randomUUID();
+    file.originalname = decodeURIComponent(file.originalname);
    cb(null, `${file.originalname}`);
  },
 });
--- a/api/server/routes/models.js
+++ b/api/server/routes/models.js
@@ -1,8 +1,8 @@
 const express = require('express');
-const router = express.Router();
-const controller = require('../controllers/ModelController');
-const { requireJwtAuth } = require('../middleware/');
+const { modelController } = require('~/server/controllers/ModelController');
+const { requireJwtAuth } = require('~/server/middleware/');

-router.get('/', requireJwtAuth, controller);
+const router = express.Router();
+router.get('/', requireJwtAuth, modelController);

 module.exports = router;
--- a/api/server/services/ActionService.js
+++ b/api/server/services/ActionService.js
@@ -1,18 +1,45 @@
-const { AuthTypeEnum } = require('librechat-data-provider');
+const { AuthTypeEnum, EModelEndpoint, actionDomainSeparator } = require('librechat-data-provider');
 const { encryptV2, decryptV2 } = require('~/server/utils/crypto');
 const { getActions } = require('~/models/Action');
 const { logger } = require('~/config');

+/**
+ * Parses the domain for an action.
+ *
+ * Azure OpenAI Assistants API doesn't support periods in function
+ * names due to `[a-zA-Z0-9_-]*` Regex Validation.
+ *
+ * @param {Express.Request} req - Express Request object
+ * @param {string} domain - The domain for the actoin
+ * @param {boolean} inverse - If true, replaces periods with `actionDomainSeparator`
+ * @returns {string} The parsed domain
+ */
+function domainParser(req, domain, inverse = false) {
+  if (!domain) {
+    return;
+  }
+
+  if (!req.app.locals[EModelEndpoint.azureOpenAI]?.assistants) {
+    return domain;
+  }
+
+  if (inverse) {
+    return domain.replace(/\./g, actionDomainSeparator);
+  }
+
+  return domain.replace(actionDomainSeparator, '.');
+}
+
 /**
 * Loads action sets based on the user and assistant ID.
 *
- * @param {Object} params - The parameters for loading action sets.
- * @param {string} params.user - The user identifier.
- * @param {string} params.assistant_id - The assistant identifier.
+ * @param {Object} searchParams - The parameters for loading action sets.
+ * @param {string} searchParams.user - The user identifier.
+ * @param {string} searchParams.assistant_id - The assistant identifier.
 * @returns {Promise<Action[] | null>} A promise that resolves to an array of actions or `null` if no match.
 */
-async function loadActionSets({ user, assistant_id }) {
-  return await getActions({ user, assistant_id }, true);
+async function loadActionSets(searchParams) {
+  return await getActions(searchParams, true);
 }

 /**
@@ -40,7 +67,9 @@ function createActionTool({ action, requestBuilder }) {
      logger.error(`API call to ${action.metadata.domain} failed`, error);
      if (error.response) {
        const { status, data } = error.response;
-        return `API call to ${action.metadata.domain} failed with status ${status}: ${data}`;
+        return `API call to ${
+          action.metadata.domain
+        } failed with status ${status}: ${JSON.stringify(data)}`;
      }

      return `API call to ${action.metadata.domain} failed.`;
@@ -115,4 +144,5 @@ module.exports = {
  createActionTool,
  encryptMetadata,
  decryptMetadata,
+  domainParser,
 };
--- a/api/server/services/AppService.js
+++ b/api/server/services/AppService.js
@@ -1,8 +1,14 @@
 const {
-  FileSources,
-  EModelEndpoint,
  Constants,
+  FileSources,
+  Capabilities,
+  EModelEndpoint,
  defaultSocialLogins,
+  validateAzureGroups,
+  mapModelToAzureConfig,
+  assistantEndpointSchema,
+  deprecatedAzureVariables,
+  conflictingAzureVariables,
 } = require('librechat-data-provider');
 const { initializeFirebase } = require('./Files/Firebase/initialize');
 const loadCustomConfig = require('./Config/loadCustomConfig');
@@ -62,31 +68,111 @@ const AppService = async (app) => {
  handleRateLimits(config?.rateLimits);

  const endpointLocals = {};
-  if (config?.endpoints?.[EModelEndpoint.assistants]) {
-    const { disableBuilder, pollIntervalMs, timeoutMs, supportedIds, excludedIds } =
-      config.endpoints[EModelEndpoint.assistants];

-    if (supportedIds?.length && excludedIds?.length) {
+  if (config?.endpoints?.[EModelEndpoint.azureOpenAI]) {
+    const { groups, ...azureConfiguration } = config.endpoints[EModelEndpoint.azureOpenAI];
+    const { isValid, modelNames, modelGroupMap, groupMap, errors } = validateAzureGroups(groups);
+
+    if (!isValid) {
+      const errorString = errors.join('\n');
+      const errorMessage = 'Invalid Azure OpenAI configuration:\n' + errorString;
+      logger.error(errorMessage);
+      throw new Error(errorMessage);
+    }
+
+    const assistantModels = [];
+    const assistantGroups = new Set();
+    for (const modelName of modelNames) {
+      mapModelToAzureConfig({ modelName, modelGroupMap, groupMap });
+      const groupName = modelGroupMap?.[modelName]?.group;
+      const modelGroup = groupMap?.[groupName];
+      let supportsAssistants = modelGroup?.assistants || modelGroup?.[modelName]?.assistants;
+      if (supportsAssistants) {
+        assistantModels.push(modelName);
+        !assistantGroups.has(groupName) && assistantGroups.add(groupName);
+      }
+    }
+
+    if (azureConfiguration.assistants && assistantModels.length === 0) {
+      throw new Error(
+        'No Azure models are configured to support assistants. Please remove the `assistants` field or configure at least one model to support assistants.',
+      );
+    }
+
+    endpointLocals[EModelEndpoint.azureOpenAI] = {
+      modelNames,
+      modelGroupMap,
+      groupMap,
+      assistantModels,
+      assistantGroups: Array.from(assistantGroups),
+      ...azureConfiguration,
+    };
+
+    deprecatedAzureVariables.forEach(({ key, description }) => {
+      if (process.env[key]) {
+        logger.warn(
+          `The \`${key}\` environment variable (related to ${description}) should not be used in combination with the \`azureOpenAI\` endpoint configuration, as you will experience conflicts and errors.`,
+        );
+      }
+    });
+
+    conflictingAzureVariables.forEach(({ key }) => {
+      if (process.env[key]) {
+        logger.warn(
+          `The \`${key}\` environment variable should not be used in combination with the \`azureOpenAI\` endpoint configuration, as you may experience with the defined placeholders for mapping to the current model grouping using the same name.`,
+        );
+      }
+    });
+
+    if (azureConfiguration.assistants) {
+      endpointLocals[EModelEndpoint.assistants] = {
+        // Note: may need to add retrieval models here in the future
+        capabilities: [Capabilities.tools, Capabilities.actions, Capabilities.code_interpreter],
+      };
+    }
+  }
+
+  if (config?.endpoints?.[EModelEndpoint.assistants]) {
+    const assistantsConfig = config.endpoints[EModelEndpoint.assistants];
+    const parsedConfig = assistantEndpointSchema.parse(assistantsConfig);
+    if (assistantsConfig.supportedIds?.length && assistantsConfig.excludedIds?.length) {
      logger.warn(
        `Both \`supportedIds\` and \`excludedIds\` are defined for the ${EModelEndpoint.assistants} endpoint; \`excludedIds\` field will be ignored.`,
      );
    }

+    const prevConfig = endpointLocals[EModelEndpoint.assistants] ?? {};
+
    /** @type {Partial<TAssistantEndpoint>} */
    endpointLocals[EModelEndpoint.assistants] = {
-      disableBuilder,
-      pollIntervalMs,
-      timeoutMs,
-      supportedIds,
-      excludedIds,
+      ...prevConfig,
+      retrievalModels: parsedConfig.retrievalModels,
+      disableBuilder: parsedConfig.disableBuilder,
+      pollIntervalMs: parsedConfig.pollIntervalMs,
+      supportedIds: parsedConfig.supportedIds,
+      capabilities: parsedConfig.capabilities,
+      excludedIds: parsedConfig.excludedIds,
+      timeoutMs: parsedConfig.timeoutMs,
    };
  }

+  try {
+    const response = await fetch(`${process.env.RAG_API_URL}/health`);
+    if (response?.ok && response?.status === 200) {
+      logger.info(`RAG API is running and reachable at ${process.env.RAG_API_URL}.`);
+    }
+  } catch (error) {
+    logger.warn(
+      `RAG API is either not running or not reachable at ${process.env.RAG_API_URL}, you may experience errors with file uploads.`,
+    );
+  }
+
  app.locals = {
    socialLogins,
    availableTools,
    fileStrategy,
    fileConfig: config?.fileConfig,
+    interface: config?.interface,
    paths,
    ...endpointLocals,
  };
--- a/api/server/services/AppService.spec.js
+++ b/api/server/services/AppService.spec.js
@@ -1,4 +1,11 @@
-const { FileSources, defaultSocialLogins } = require('librechat-data-provider');
+const {
+  FileSources,
+  EModelEndpoint,
+  defaultSocialLogins,
+  validateAzureGroups,
+  deprecatedAzureVariables,
+  conflictingAzureVariables,
+} = require('librechat-data-provider');

 const AppService = require('./AppService');

@@ -32,6 +39,43 @@ jest.mock('./ToolService', () => ({
  }),
 }));

+const azureGroups = [
+  {
+    group: 'librechat-westus',
+    apiKey: '${WESTUS_API_KEY}',
+    instanceName: 'librechat-westus',
+    version: '2023-12-01-preview',
+    models: {
+      'gpt-4-vision-preview': {
+        deploymentName: 'gpt-4-vision-preview',
+        version: '2024-02-15-preview',
+      },
+      'gpt-3.5-turbo': {
+        deploymentName: 'gpt-35-turbo',
+      },
+      'gpt-3.5-turbo-1106': {
+        deploymentName: 'gpt-35-turbo-1106',
+      },
+      'gpt-4': {
+        deploymentName: 'gpt-4',
+      },
+      'gpt-4-1106-preview': {
+        deploymentName: 'gpt-4-1106-preview',
+      },
+    },
+  },
+  {
+    group: 'librechat-eastus',
+    apiKey: '${EASTUS_API_KEY}',
+    instanceName: 'librechat-eastus',
+    deploymentName: 'gpt-4-turbo',
+    version: '2024-02-15-preview',
+    models: {
+      'gpt-4-turbo': true,
+    },
+  },
+];
+
 describe('AppService', () => {
  let app;

@@ -122,11 +166,11 @@ describe('AppService', () => {
    });
  });

-  it('should correctly configure endpoints based on custom config', async () => {
+  it('should correctly configure Assistants endpoint based on custom config', async () => {
    require('./Config/loadCustomConfig').mockImplementationOnce(() =>
      Promise.resolve({
        endpoints: {
-          assistants: {
+          [EModelEndpoint.assistants]: {
            disableBuilder: true,
            pollIntervalMs: 5000,
            timeoutMs: 30000,
@@ -138,8 +182,8 @@ describe('AppService', () => {

    await AppService(app);

-    expect(app.locals).toHaveProperty('assistants');
-    expect(app.locals.assistants).toEqual(
+    expect(app.locals).toHaveProperty(EModelEndpoint.assistants);
+    expect(app.locals[EModelEndpoint.assistants]).toEqual(
      expect.objectContaining({
        disableBuilder: true,
        pollIntervalMs: 5000,
@@ -149,6 +193,34 @@ describe('AppService', () => {
    );
  });

+  it('should correctly configure Azure OpenAI endpoint based on custom config', async () => {
+    require('./Config/loadCustomConfig').mockImplementationOnce(() =>
+      Promise.resolve({
+        endpoints: {
+          [EModelEndpoint.azureOpenAI]: {
+            groups: azureGroups,
+          },
+        },
+      }),
+    );
+
+    process.env.WESTUS_API_KEY = 'westus-key';
+    process.env.EASTUS_API_KEY = 'eastus-key';
+
+    await AppService(app);
+
+    expect(app.locals).toHaveProperty(EModelEndpoint.azureOpenAI);
+    const azureConfig = app.locals[EModelEndpoint.azureOpenAI];
+    expect(azureConfig).toHaveProperty('modelNames');
+    expect(azureConfig).toHaveProperty('modelGroupMap');
+    expect(azureConfig).toHaveProperty('groupMap');
+
+    const { modelNames, modelGroupMap, groupMap } = validateAzureGroups(azureGroups);
+    expect(azureConfig.modelNames).toEqual(modelNames);
+    expect(azureConfig.modelGroupMap).toEqual(modelGroupMap);
+    expect(azureConfig.groupMap).toEqual(groupMap);
+  });
+
  it('should not modify FILE_UPLOAD environment variables without rate limits', async () => {
    // Setup initial environment variables
    process.env.FILE_UPLOAD_IP_MAX = '10';
@@ -213,7 +285,7 @@ describe('AppService', () => {
  });
 });

-describe('AppService updating app.locals', () => {
+describe('AppService updating app.locals and issuing warnings', () => {
  let app;
  let initialEnv;

@@ -309,4 +381,56 @@ describe('AppService updating app.locals', () => {
      expect.stringContaining('Both `supportedIds` and `excludedIds` are defined'),
    );
  });
+
+  it('should issue expected warnings when loading Azure Groups with deprecated Environment Variables', async () => {
+    require('./Config/loadCustomConfig').mockImplementationOnce(() =>
+      Promise.resolve({
+        endpoints: {
+          [EModelEndpoint.azureOpenAI]: {
+            groups: azureGroups,
+          },
+        },
+      }),
+    );
+
+    deprecatedAzureVariables.forEach((varInfo) => {
+      process.env[varInfo.key] = 'test';
+    });
+
+    const app = { locals: {} };
+    await require('./AppService')(app);
+
+    const { logger } = require('~/config');
+    deprecatedAzureVariables.forEach(({ key, description }) => {
+      expect(logger.warn).toHaveBeenCalledWith(
+        `The \`${key}\` environment variable (related to ${description}) should not be used in combination with the \`azureOpenAI\` endpoint configuration, as you will experience conflicts and errors.`,
+      );
+    });
+  });
+
+  it('should issue expected warnings when loading conflicting Azure Envrionment Variables', async () => {
+    require('./Config/loadCustomConfig').mockImplementationOnce(() =>
+      Promise.resolve({
+        endpoints: {
+          [EModelEndpoint.azureOpenAI]: {
+            groups: azureGroups,
+          },
+        },
+      }),
+    );
+
+    conflictingAzureVariables.forEach((varInfo) => {
+      process.env[varInfo.key] = 'test';
+    });
+
+    const app = { locals: {} };
+    await require('./AppService')(app);
+
+    const { logger } = require('~/config');
+    conflictingAzureVariables.forEach(({ key }) => {
+      expect(logger.warn).toHaveBeenCalledWith(
+        `The \`${key}\` environment variable should not be used in combination with the \`azureOpenAI\` endpoint configuration, as you may experience with the defined placeholders for mapping to the current model grouping using the same name.`,
+      );
+    });
+  });
 });
--- a/api/server/services/AssistantService.js
+++ b/api/server/services/AssistantService.js
@@ -1,21 +1,19 @@
-const path = require('path');
 const { klona } = require('klona');
 const {
  StepTypes,
  RunStatus,
  StepStatus,
-  FilePurpose,
  ContentTypes,
  ToolCallTypes,
-  imageExtRegex,
  imageGenTools,
  EModelEndpoint,
  defaultOrderQuery,
 } = require('librechat-data-provider');
 const { retrieveAndProcessFile } = require('~/server/services/Files/process');
-const { RunManager, waitForRun, sleep } = require('~/server/services/Runs');
 const { processRequiredActions } = require('~/server/services/ToolService');
-const { createOnProgress, sendMessage } = require('~/server/utils');
+const { createOnProgress, sendMessage, sleep } = require('~/server/utils');
+const { RunManager, waitForRun } = require('~/server/services/Runs');
+const { processMessages } = require('~/server/services/Threads');
 const { TextStream } = require('~/app/clients');
 const { logger } = require('~/config');

@@ -230,17 +228,13 @@ function createInProgressHandler(openai, thread_id, messages) {
            const { file_id } = output.image;
            const file = await retrieveAndProcessFile({
              openai,
+              client: openai,
              file_id,
              basename: `${file_id}.png`,
            });
-            // toolCall.asset_pointer = file.filepath;
-            const prelimImage = {
-              file_id,
-              filename: path.basename(file.filepath),
-              filepath: file.filepath,
-              height: file.height,
-              width: file.width,
-            };
+
+            const prelimImage = file;
+
            // check if every key has a value before adding to content
            const prelimImageKeys = Object.keys(prelimImage);
            const validImageFile = prelimImageKeys.every((key) => prelimImage[key]);
@@ -286,6 +280,9 @@ function createInProgressHandler(openai, thread_id, messages) {
      openai.seenCompletedMessages.add(message_id);

      const message = await openai.beta.threads.messages.retrieve(thread_id, message_id);
+      if (!message?.content?.length) {
+        return;
+      }
      messages.push(message);

      let messageIndex = openai.mappedOrder.get(step.id);
@@ -296,7 +293,7 @@ function createInProgressHandler(openai, thread_id, messages) {
        openai.index++;
      }

-      const result = await processMessages(openai, [message]);
+      const result = await processMessages({ openai, client: openai, messages: [message] });
      openai.addContentData({
        [ContentTypes.TEXT]: { value: result.text },
        type: ContentTypes.TEXT,
@@ -315,8 +312,8 @@ function createInProgressHandler(openai, thread_id, messages) {
        res: openai.res,
        index: messageIndex,
        messageId: openai.responseMessage.messageId,
+        conversationId: openai.responseMessage.conversationId,
        type: ContentTypes.TEXT,
-        stream: true,
        thread_id,
      });

@@ -413,7 +410,13 @@ async function runAssistant({
    // const { messages: sortedMessages, text } = await processMessages(openai, messages);
    // return { run, steps, messages: sortedMessages, text };
    const sortedMessages = messages.sort((a, b) => a.created_at - b.created_at);
-    return { run, steps, messages: sortedMessages };
+    return {
+      run,
+      steps,
+      messages: sortedMessages,
+      finalMessage: openai.responseMessage,
+      text: openai.responseText,
+    };
  }

  const { submit_tool_outputs } = run.required_action;
@@ -444,98 +447,8 @@ async function runAssistant({
  });
 }

-/**
- * Sorts, processes, and flattens messages to a single string.
- *
- * @param {OpenAIClient} openai - The OpenAI client instance.
- * @param {ThreadMessage[]} messages - An array of messages.
- * @returns {Promise<{messages: ThreadMessage[], text: string}>} The sorted messages and the flattened text.
- */
-async function processMessages(openai, messages = []) {
-  const sorted = messages.sort((a, b) => a.created_at - b.created_at);
-
-  let text = '';
-  for (const message of sorted) {
-    message.files = [];
-    for (const content of message.content) {
-      const processImageFile =
-        content.type === 'image_file' && !openai.processedFileIds.has(content.image_file?.file_id);
-      if (processImageFile) {
-        const { file_id } = content.image_file;
-
-        const file = await retrieveAndProcessFile({ openai, file_id, basename: `${file_id}.png` });
-        openai.processedFileIds.add(file_id);
-        message.files.push(file);
-        continue;
-      }
-
-      text += (content.text?.value ?? '') + ' ';
-      logger.debug('[processMessages] Processing message:', { value: text });
-
-      // Process annotations if they exist
-      if (!content.text?.annotations?.length) {
-        continue;
-      }
-
-      logger.debug('[processMessages] Processing annotations:', content.text.annotations);
-      for (const annotation of content.text.annotations) {
-        logger.debug('Current annotation:', annotation);
-        let file;
-        const processFilePath =
-          annotation.file_path && !openai.processedFileIds.has(annotation.file_path?.file_id);
-
-        if (processFilePath) {
-          const basename = imageExtRegex.test(annotation.text)
-            ? path.basename(annotation.text)
-            : null;
-          file = await retrieveAndProcessFile({
-            openai,
-            file_id: annotation.file_path.file_id,
-            basename,
-          });
-          openai.processedFileIds.add(annotation.file_path.file_id);
-        }
-
-        const processFileCitation =
-          annotation.file_citation &&
-          !openai.processedFileIds.has(annotation.file_citation?.file_id);
-
-        if (processFileCitation) {
-          file = await retrieveAndProcessFile({
-            openai,
-            file_id: annotation.file_citation.file_id,
-            unknownType: true,
-          });
-          openai.processedFileIds.add(annotation.file_citation.file_id);
-        }
-
-        if (!file && (annotation.file_path || annotation.file_citation)) {
-          const { file_id } = annotation.file_citation || annotation.file_path || {};
-          file = await retrieveAndProcessFile({ openai, file_id, unknownType: true });
-          openai.processedFileIds.add(file_id);
-        }
-
-        if (!file) {
-          continue;
-        }
-
-        if (file.purpose && file.purpose === FilePurpose.Assistants) {
-          text = text.replace(annotation.text, file.filename);
-        } else if (file.filepath) {
-          text = text.replace(annotation.text, file.filepath);
-        }
-
-        message.files.push(file);
-      }
-    }
-  }
-
-  return { messages: sorted, text };
-}
-
 module.exports = {
  getResponse,
  runAssistant,
-  processMessages,
  createOnTextProgress,
 };
--- a/api/server/services/AuthService.js
+++ b/api/server/services/AuthService.js
@@ -1,6 +1,7 @@
 const crypto = require('crypto');
 const bcrypt = require('bcryptjs');
-const { registerSchema, errorsToString } = require('~/strategies/validators');
+const { errorsToString } = require('librechat-data-provider');
+const { registerSchema } = require('~/strategies/validators');
 const getCustomConfig = require('~/server/services/Config/getCustomConfig');
 const Token = require('~/models/schema/tokenSchema');
 const { sendEmail } = require('~/server/utils');
@@ -171,8 +172,10 @@ const requestPasswordReset = async (email) => {
      user.email,
      'Password Reset Request',
      {
+        appName: process.env.APP_TITLE || 'LibreChat',
        name: user.name,
        link: link,
+        year: new Date().getFullYear(),
      },
      'requestPasswordReset.handlebars',
    );
@@ -213,7 +216,9 @@ const resetPassword = async (userId, token, password) => {
    user.email,
    'Password Reset Successfully',
    {
+      appName: process.env.APP_TITLE || 'LibreChat',
      name: user.name,
+      year: new Date().getFullYear(),
    },
    'passwordReset.handlebars',
  );
--- a/api/server/services/Config/EndpointService.js
+++ b/api/server/services/Config/EndpointService.js
@@ -1,4 +1,5 @@
 const { EModelEndpoint } = require('librechat-data-provider');
+const { isUserProvided, generateConfig } = require('~/server/utils');

 const {
  OPENAI_API_KEY: openAIApiKey,
@@ -9,17 +10,16 @@ const {
  BINGAI_TOKEN: bingToken,
  PLUGINS_USE_AZURE,
  GOOGLE_KEY: googleKey,
+  OPENAI_REVERSE_PROXY,
+  AZURE_OPENAI_BASEURL,
+  ASSISTANTS_BASE_URL,
 } = process.env ?? {};

 const useAzurePlugins = !!PLUGINS_USE_AZURE;

 const userProvidedOpenAI = useAzurePlugins
-  ? azureOpenAIApiKey === 'user_provided'
-  : openAIApiKey === 'user_provided';
-
-function isUserProvided(key) {
-  return key ? { userProvide: key === 'user_provided' } : false;
-}
+  ? isUserProvided(azureOpenAIApiKey)
+  : isUserProvided(openAIApiKey);

 module.exports = {
  config: {
@@ -28,11 +28,11 @@ module.exports = {
    useAzurePlugins,
    userProvidedOpenAI,
    googleKey,
-    [EModelEndpoint.openAI]: isUserProvided(openAIApiKey),
-    [EModelEndpoint.assistants]: isUserProvided(assistantsApiKey),
-    [EModelEndpoint.azureOpenAI]: isUserProvided(azureOpenAIApiKey),
-    [EModelEndpoint.chatGPTBrowser]: isUserProvided(chatGPTToken),
-    [EModelEndpoint.anthropic]: isUserProvided(anthropicApiKey),
-    [EModelEndpoint.bingAI]: isUserProvided(bingToken),
+    [EModelEndpoint.openAI]: generateConfig(openAIApiKey, OPENAI_REVERSE_PROXY),
+    [EModelEndpoint.assistants]: generateConfig(assistantsApiKey, ASSISTANTS_BASE_URL, true),
+    [EModelEndpoint.azureOpenAI]: generateConfig(azureOpenAIApiKey, AZURE_OPENAI_BASEURL),
+    [EModelEndpoint.chatGPTBrowser]: generateConfig(chatGPTToken),
+    [EModelEndpoint.anthropic]: generateConfig(anthropicApiKey),
+    [EModelEndpoint.bingAI]: generateConfig(bingToken),
  },
 };
--- a/api/server/services/Config/loadAsyncEndpoints.js
+++ b/api/server/services/Config/loadAsyncEndpoints.js
@@ -1,12 +1,16 @@
-const { availableTools } = require('~/app/clients/tools');
+const { EModelEndpoint } = require('librechat-data-provider');
 const { addOpenAPISpecs } = require('~/app/clients/tools/util/addOpenAPISpecs');
-const { openAIApiKey, azureOpenAIApiKey, useAzurePlugins, userProvidedOpenAI, googleKey } =
-  require('./EndpointService').config;
+const { availableTools } = require('~/app/clients/tools');
+const { isUserProvided } = require('~/server/utils');
+const { config } = require('./EndpointService');
+
+const { openAIApiKey, azureOpenAIApiKey, useAzurePlugins, userProvidedOpenAI, googleKey } = config;

 /**
 * Load async endpoints and return a configuration object
+ * @param {Express.Request} req - The request object
 */
-async function loadAsyncEndpoints() {
+async function loadAsyncEndpoints(req) {
  let i = 0;
  let serviceKey, googleUserProvides;
  try {
@@ -17,7 +21,7 @@ async function loadAsyncEndpoints() {
    }
  }

-  if (googleKey === 'user_provided') {
+  if (isUserProvided(googleKey)) {
    googleUserProvides = true;
    if (i <= 1) {
      i++;
@@ -35,13 +39,18 @@ async function loadAsyncEndpoints() {

  const google = serviceKey || googleKey ? { userProvide: googleUserProvides } : false;

+  const useAzure = req.app.locals[EModelEndpoint.azureOpenAI]?.plugins;
  const gptPlugins =
-    openAIApiKey || azureOpenAIApiKey
+    useAzure || openAIApiKey || azureOpenAIApiKey
      ? {
        plugins,
        availableAgents: ['classic', 'functions'],
-        userProvide: userProvidedOpenAI,
-        azure: useAzurePlugins,
+        userProvide: useAzure ? false : userProvidedOpenAI,
+        userProvideURL: useAzure
+          ? false
+          : config[EModelEndpoint.openAI]?.userProvideURL ||
+              config[EModelEndpoint.azureOpenAI]?.userProvideURL,
+        azure: useAzurePlugins || useAzure,
      }
      : false;

--- a/api/server/services/Config/loadConfigEndpoints.js
+++ b/api/server/services/Config/loadConfigEndpoints.js
@@ -1,11 +1,13 @@
-const { EModelEndpoint } = require('librechat-data-provider');
-const { isUserProvided, extractEnvVariable } = require('~/server/utils');
+const { EModelEndpoint, extractEnvVariable } = require('librechat-data-provider');
+const { isUserProvided } = require('~/server/utils');
 const getCustomConfig = require('./getCustomConfig');

 /**
 * Load config endpoints from the cached configuration object
- * @function loadConfigEndpoints */
-async function loadConfigEndpoints() {
+ * @param {Express.Request} req - The request object
+ * @returns {Promise<TEndpointsConfig>} A promise that resolves to an object containing the endpoints configuration
+ */
+async function loadConfigEndpoints(req) {
  const customConfig = await getCustomConfig();

  if (!customConfig) {
@@ -42,6 +44,20 @@ async function loadConfigEndpoints() {
    }
  }

+  if (req.app.locals[EModelEndpoint.azureOpenAI]) {
+    /** @type {Omit<TConfig, 'order'>} */
+    endpointsConfig[EModelEndpoint.azureOpenAI] = {
+      userProvide: false,
+    };
+  }
+
+  if (req.app.locals[EModelEndpoint.azureOpenAI]?.assistants) {
+    /** @type {Omit<TConfig, 'order'>} */
+    endpointsConfig[EModelEndpoint.assistants] = {
+      userProvide: false,
+    };
+  }
+
  return endpointsConfig;
 }

--- a/api/server/services/Config/loadConfigModels.js
+++ b/api/server/services/Config/loadConfigModels.js
@@ -1,6 +1,6 @@
-const { EModelEndpoint } = require('librechat-data-provider');
-const { isUserProvided, extractEnvVariable } = require('~/server/utils');
+const { EModelEndpoint, extractEnvVariable } = require('librechat-data-provider');
 const { fetchModels } = require('~/server/services/ModelService');
+const { isUserProvided } = require('~/server/utils');
 const getCustomConfig = require('./getCustomConfig');

 /**
@@ -17,6 +17,21 @@ async function loadConfigModels(req) {

  const { endpoints = {} } = customConfig ?? {};
  const modelsConfig = {};
+  const azureEndpoint = endpoints[EModelEndpoint.azureOpenAI];
+  const azureConfig = req.app.locals[EModelEndpoint.azureOpenAI];
+  const { modelNames } = azureConfig ?? {};
+
+  if (modelNames && azureEndpoint) {
+    modelsConfig[EModelEndpoint.azureOpenAI] = modelNames;
+  }
+
+  if (modelNames && azureEndpoint && azureEndpoint.plugins) {
+    modelsConfig[EModelEndpoint.gptPlugins] = modelNames;
+  }
+
+  if (azureEndpoint?.assistants && azureConfig.assistantModels) {
+    modelsConfig[EModelEndpoint.assistants] = azureConfig.assistantModels;
+  }

  if (!Array.isArray(endpoints[EModelEndpoint.custom])) {
    return modelsConfig;
@@ -31,21 +46,34 @@ async function loadConfigModels(req) {
      (endpoint.models.fetch || endpoint.models.default),
  );

-  const fetchPromisesMap = {}; // Map for promises keyed by baseURL
-  const baseUrlToNameMap = {}; // Map to associate baseURLs with names
+  /**
+   * @type {Record<string, string[]>}
+   * Map for promises keyed by unique combination of baseURL and apiKey */
+  const fetchPromisesMap = {};
+  /**
+   * @type {Record<string, string[]>}
+   * Map to associate unique keys with endpoint names; note: one key may can correspond to multiple endpoints */
+  const uniqueKeyToEndpointsMap = {};
+  /**
+   * @type {Record<string, Partial<TEndpoint>>}
+   * Map to associate endpoint names to their configurations */
+  const endpointsMap = {};

  for (let i = 0; i < customEndpoints.length; i++) {
    const endpoint = customEndpoints[i];
    const { models, name, baseURL, apiKey } = endpoint;
+    endpointsMap[name] = endpoint;

    const API_KEY = extractEnvVariable(apiKey);
    const BASE_URL = extractEnvVariable(baseURL);

+    const uniqueKey = `${BASE_URL}__${API_KEY}`;
+
    modelsConfig[name] = [];

    if (models.fetch && !isUserProvided(API_KEY) && !isUserProvided(BASE_URL)) {
-      fetchPromisesMap[BASE_URL] =
-        fetchPromisesMap[BASE_URL] ||
+      fetchPromisesMap[uniqueKey] =
+        fetchPromisesMap[uniqueKey] ||
        fetchModels({
          user: req.user.id,
          baseURL: BASE_URL,
@@ -53,8 +81,8 @@ async function loadConfigModels(req) {
          name,
          userIdQuery: models.userIdQuery,
        });
-      baseUrlToNameMap[BASE_URL] = baseUrlToNameMap[BASE_URL] || [];
-      baseUrlToNameMap[BASE_URL].push(name);
+      uniqueKeyToEndpointsMap[uniqueKey] = uniqueKeyToEndpointsMap[uniqueKey] || [];
+      uniqueKeyToEndpointsMap[uniqueKey].push(name);
      continue;
    }

@@ -64,15 +92,16 @@ async function loadConfigModels(req) {
  }

  const fetchedData = await Promise.all(Object.values(fetchPromisesMap));
-  const baseUrls = Object.keys(fetchPromisesMap);
+  const uniqueKeys = Object.keys(fetchPromisesMap);

  for (let i = 0; i < fetchedData.length; i++) {
-    const currentBaseUrl = baseUrls[i];
+    const currentKey = uniqueKeys[i];
    const modelData = fetchedData[i];
-    const associatedNames = baseUrlToNameMap[currentBaseUrl];
+    const associatedNames = uniqueKeyToEndpointsMap[currentKey];

    for (const name of associatedNames) {
-      modelsConfig[name] = modelData;
+      const endpoint = endpointsMap[name];
+      modelsConfig[name] = !modelData?.length ? endpoint.models.default ?? [] : modelData;
    }
  }

--- a/api/server/services/Config/loadConfigModels.spec.js
+++ b/api/server/services/Config/loadConfigModels.spec.js
@@ -0,0 +1,329 @@
+const { fetchModels } = require('~/server/services/ModelService');
+const loadConfigModels = require('./loadConfigModels');
+const getCustomConfig = require('./getCustomConfig');
+
+jest.mock('~/server/services/ModelService');
+jest.mock('./getCustomConfig');
+
+const exampleConfig = {
+  endpoints: {
+    custom: [
+      {
+        name: 'Mistral',
+        apiKey: '${MY_PRECIOUS_MISTRAL_KEY}',
+        baseURL: 'https://api.mistral.ai/v1',
+        models: {
+          default: ['mistral-tiny', 'mistral-small', 'mistral-medium', 'mistral-large-latest'],
+          fetch: true,
+        },
+        dropParams: ['stop', 'user', 'frequency_penalty', 'presence_penalty'],
+      },
+      {
+        name: 'OpenRouter',
+        apiKey: '${MY_OPENROUTER_API_KEY}',
+        baseURL: 'https://openrouter.ai/api/v1',
+        models: {
+          default: ['gpt-3.5-turbo'],
+          fetch: true,
+        },
+        dropParams: ['stop'],
+      },
+      {
+        name: 'groq',
+        apiKey: 'user_provided',
+        baseURL: 'https://api.groq.com/openai/v1/',
+        models: {
+          default: ['llama2-70b-4096', 'mixtral-8x7b-32768'],
+          fetch: false,
+        },
+      },
+      {
+        name: 'Ollama',
+        apiKey: 'user_provided',
+        baseURL: 'http://localhost:11434/v1/',
+        models: {
+          default: ['mistral', 'llama2:13b'],
+          fetch: false,
+        },
+      },
+    ],
+  },
+};
+
+describe('loadConfigModels', () => {
+  const mockRequest = { app: { locals: {} }, user: { id: 'testUserId' } };
+
+  const originalEnv = process.env;
+
+  beforeEach(() => {
+    jest.resetAllMocks();
+    jest.resetModules();
+    process.env = { ...originalEnv };
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+
+  it('should return an empty object if customConfig is null', async () => {
+    getCustomConfig.mockResolvedValue(null);
+    const result = await loadConfigModels(mockRequest);
+    expect(result).toEqual({});
+  });
+
+  it('handles azure models and endpoint correctly', async () => {
+    mockRequest.app.locals.azureOpenAI = { modelNames: ['model1', 'model2'] };
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        azureOpenAI: {
+          models: ['model1', 'model2'],
+        },
+      },
+    });
+
+    const result = await loadConfigModels(mockRequest);
+    expect(result.azureOpenAI).toEqual(['model1', 'model2']);
+  });
+
+  it('fetches custom models based on the unique key', async () => {
+    process.env.BASE_URL = 'http://example.com';
+    process.env.API_KEY = 'some-api-key';
+    const customEndpoints = {
+      custom: [
+        {
+          baseURL: '${BASE_URL}',
+          apiKey: '${API_KEY}',
+          name: 'CustomModel',
+          models: { fetch: true },
+        },
+      ],
+    };
+
+    getCustomConfig.mockResolvedValue({ endpoints: customEndpoints });
+    fetchModels.mockResolvedValue(['customModel1', 'customModel2']);
+
+    const result = await loadConfigModels(mockRequest);
+    expect(fetchModels).toHaveBeenCalled();
+    expect(result.CustomModel).toEqual(['customModel1', 'customModel2']);
+  });
+
+  it('correctly associates models to names using unique keys', async () => {
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        custom: [
+          {
+            baseURL: 'http://example.com',
+            apiKey: 'API_KEY1',
+            name: 'Model1',
+            models: { fetch: true },
+          },
+          {
+            baseURL: 'http://example.com',
+            apiKey: 'API_KEY2',
+            name: 'Model2',
+            models: { fetch: true },
+          },
+        ],
+      },
+    });
+    fetchModels.mockImplementation(({ apiKey }) =>
+      Promise.resolve(apiKey === 'API_KEY1' ? ['model1Data'] : ['model2Data']),
+    );
+
+    const result = await loadConfigModels(mockRequest);
+    expect(result.Model1).toEqual(['model1Data']);
+    expect(result.Model2).toEqual(['model2Data']);
+  });
+
+  it('correctly handles multiple endpoints with the same baseURL but different apiKeys', async () => {
+    // Mock the custom configuration to simulate the user's scenario
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        custom: [
+          {
+            name: 'LiteLLM',
+            apiKey: '${LITELLM_ALL_MODELS}',
+            baseURL: '${LITELLM_HOST}',
+            models: { fetch: true },
+          },
+          {
+            name: 'OpenAI',
+            apiKey: '${LITELLM_OPENAI_MODELS}',
+            baseURL: '${LITELLM_SECOND_HOST}',
+            models: { fetch: true },
+          },
+          {
+            name: 'Google',
+            apiKey: '${LITELLM_GOOGLE_MODELS}',
+            baseURL: '${LITELLM_SECOND_HOST}',
+            models: { fetch: true },
+          },
+        ],
+      },
+    });
+
+    // Mock `fetchModels` to return different models based on the apiKey
+    fetchModels.mockImplementation(({ apiKey }) => {
+      switch (apiKey) {
+        case '${LITELLM_ALL_MODELS}':
+          return Promise.resolve(['AllModel1', 'AllModel2']);
+        case '${LITELLM_OPENAI_MODELS}':
+          return Promise.resolve(['OpenAIModel']);
+        case '${LITELLM_GOOGLE_MODELS}':
+          return Promise.resolve(['GoogleModel']);
+        default:
+          return Promise.resolve([]);
+      }
+    });
+
+    const result = await loadConfigModels(mockRequest);
+
+    // Assert that the models are correctly fetched and mapped based on unique keys
+    expect(result.LiteLLM).toEqual(['AllModel1', 'AllModel2']);
+    expect(result.OpenAI).toEqual(['OpenAIModel']);
+    expect(result.Google).toEqual(['GoogleModel']);
+
+    // Ensure that fetchModels was called with correct parameters
+    expect(fetchModels).toHaveBeenCalledTimes(3);
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({ apiKey: '${LITELLM_ALL_MODELS}' }),
+    );
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({ apiKey: '${LITELLM_OPENAI_MODELS}' }),
+    );
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({ apiKey: '${LITELLM_GOOGLE_MODELS}' }),
+    );
+  });
+
+  it('loads models based on custom endpoint configuration respecting fetch rules', async () => {
+    process.env.MY_PRECIOUS_MISTRAL_KEY = 'actual_mistral_api_key';
+    process.env.MY_OPENROUTER_API_KEY = 'actual_openrouter_api_key';
+    // Setup custom configuration with specific API keys for Mistral and OpenRouter
+    // and "user_provided" for groq and Ollama, indicating no fetch for the latter two
+    getCustomConfig.mockResolvedValue(exampleConfig);
+
+    // Assuming fetchModels would be called only for Mistral and OpenRouter
+    fetchModels.mockImplementation(({ name }) => {
+      switch (name) {
+        case 'Mistral':
+          return Promise.resolve([
+            'mistral-tiny',
+            'mistral-small',
+            'mistral-medium',
+            'mistral-large-latest',
+          ]);
+        case 'OpenRouter':
+          return Promise.resolve(['gpt-3.5-turbo']);
+        default:
+          return Promise.resolve([]);
+      }
+    });
+
+    const result = await loadConfigModels(mockRequest);
+
+    // Since fetch is true and apiKey is not "user_provided", fetching occurs for Mistral and OpenRouter
+    expect(result.Mistral).toEqual([
+      'mistral-tiny',
+      'mistral-small',
+      'mistral-medium',
+      'mistral-large-latest',
+    ]);
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'Mistral',
+        apiKey: process.env.MY_PRECIOUS_MISTRAL_KEY,
+      }),
+    );
+
+    expect(result.OpenRouter).toEqual(['gpt-3.5-turbo']);
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'OpenRouter',
+        apiKey: process.env.MY_OPENROUTER_API_KEY,
+      }),
+    );
+
+    // For groq and Ollama, since the apiKey is "user_provided", models should not be fetched
+    // Depending on your implementation's behavior regarding "default" models without fetching,
+    // you may need to adjust the following assertions:
+    expect(result.groq).toBe(exampleConfig.endpoints.custom[2].models.default);
+    expect(result.Ollama).toBe(exampleConfig.endpoints.custom[3].models.default);
+
+    // Verifying fetchModels was not called for groq and Ollama
+    expect(fetchModels).not.toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'groq',
+      }),
+    );
+    expect(fetchModels).not.toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'Ollama',
+      }),
+    );
+  });
+
+  it('falls back to default models if fetching returns an empty array', async () => {
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        custom: [
+          {
+            name: 'EndpointWithSameFetchKey',
+            apiKey: 'API_KEY',
+            baseURL: 'http://example.com',
+            models: {
+              fetch: true,
+              default: ['defaultModel1'],
+            },
+          },
+          {
+            name: 'EmptyFetchModel',
+            apiKey: 'API_KEY',
+            baseURL: 'http://example.com',
+            models: {
+              fetch: true,
+              default: ['defaultModel1', 'defaultModel2'],
+            },
+          },
+        ],
+      },
+    });
+
+    fetchModels.mockResolvedValue([]);
+
+    const result = await loadConfigModels(mockRequest);
+    expect(fetchModels).toHaveBeenCalledTimes(1);
+    expect(result.EmptyFetchModel).toEqual(['defaultModel1', 'defaultModel2']);
+  });
+
+  it('falls back to default models if fetching returns a falsy value', async () => {
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        custom: [
+          {
+            name: 'FalsyFetchModel',
+            apiKey: 'API_KEY',
+            baseURL: 'http://example.com',
+            models: {
+              fetch: true,
+              default: ['defaultModel1', 'defaultModel2'],
+            },
+          },
+        ],
+      },
+    });
+
+    fetchModels.mockResolvedValue(false);
+
+    const result = await loadConfigModels(mockRequest);
+
+    expect(fetchModels).toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'FalsyFetchModel',
+        apiKey: 'API_KEY',
+      }),
+    );
+
+    expect(result.FalsyFetchModel).toEqual(['defaultModel1', 'defaultModel2']);
+  });
+});
--- a/api/server/services/Config/loadCustomConfig.js
+++ b/api/server/services/Config/loadCustomConfig.js
@@ -1,11 +1,13 @@
 const path = require('path');
 const { CacheKeys, configSchema } = require('librechat-data-provider');
+const getLogStores = require('~/cache/getLogStores');
 const loadYaml = require('~/utils/loadYaml');
-const { getLogStores } = require('~/cache');
 const { logger } = require('~/config');
+const axios = require('axios');
+const yaml = require('js-yaml');

 const projectRoot = path.resolve(__dirname, '..', '..', '..', '..');
-const configPath = path.resolve(projectRoot, 'librechat.yaml');
+const defaultConfigPath = path.resolve(projectRoot, 'librechat.yaml');

 let i = 0;

@@ -16,19 +18,46 @@ let i = 0;
 * @returns {Promise<TCustomConfig | null>} A promise that resolves to null or the custom config object.
 * */
 async function loadCustomConfig() {
-  const customConfig = loadYaml(configPath);
-  if (!customConfig) {
-    i === 0 &&
-      logger.info(
-        'Custom config file missing or YAML format invalid.\n\nCheck out the latest config file guide for configurable options and features.\nhttps://docs.librechat.ai/install/configuration/custom_config.html\n\n',
-      );
-    i === 0 && i++;
-    return null;
+  // Use CONFIG_PATH if set, otherwise fallback to defaultConfigPath
+  const configPath = process.env.CONFIG_PATH || defaultConfigPath;
+
+  let customConfig;
+
+  if (/^https?:\/\//.test(configPath)) {
+    try {
+      const response = await axios.get(configPath);
+      customConfig = response.data;
+    } catch (error) {
+      i === 0 && logger.error(`Failed to fetch the remote config file from ${configPath}`, error);
+      i === 0 && i++;
+      return null;
+    }
+  } else {
+    customConfig = loadYaml(configPath);
+    if (!customConfig) {
+      i === 0 &&
+        logger.info(
+          'Custom config file missing or YAML format invalid.\n\nCheck out the latest config file guide for configurable options and features.\nhttps://docs.librechat.ai/install/configuration/custom_config.html\n\n',
+        );
+      i === 0 && i++;
+      return null;
+    }
+  }
+
+  if (typeof customConfig === 'string') {
+    try {
+      customConfig = yaml.load(customConfig);
+    } catch (parseError) {
+      i === 0 && logger.info(`Failed to parse the YAML config from ${configPath}`, parseError);
+      i === 0 && i++;
+      return null;
+    }
  }

  const result = configSchema.strict().safeParse(customConfig);
  if (!result.success) {
-    logger.error(`Invalid custom config file at ${configPath}`, result.error);
+    i === 0 && logger.error(`Invalid custom config file at ${configPath}`, result.error);
+    i === 0 && i++;
    return null;
  } else {
    logger.info('Custom config file loaded:');
@@ -41,8 +70,6 @@ async function loadCustomConfig() {
    await cache.set(CacheKeys.CUSTOM_CONFIG, customConfig);
  }

-  // TODO: handle remote config
-
  return customConfig;
 }

--- a/api/server/services/Config/loadCustomConfig.spec.js
+++ b/api/server/services/Config/loadCustomConfig.spec.js
@@ -0,0 +1,153 @@
+jest.mock('axios');
+jest.mock('~/cache/getLogStores');
+jest.mock('~/utils/loadYaml');
+
+const axios = require('axios');
+const loadCustomConfig = require('./loadCustomConfig');
+const getLogStores = require('~/cache/getLogStores');
+const loadYaml = require('~/utils/loadYaml');
+const { logger } = require('~/config');
+
+describe('loadCustomConfig', () => {
+  const mockSet = jest.fn();
+  const mockCache = { set: mockSet };
+
+  beforeEach(() => {
+    jest.resetAllMocks();
+    delete process.env.CONFIG_PATH;
+    getLogStores.mockReturnValue(mockCache);
+  });
+
+  it('should return null and log error if remote config fetch fails', async () => {
+    process.env.CONFIG_PATH = 'http://example.com/config.yaml';
+    axios.get.mockRejectedValue(new Error('Network error'));
+    const result = await loadCustomConfig();
+    expect(logger.error).toHaveBeenCalledTimes(1);
+    expect(result).toBeNull();
+  });
+
+  it('should return null for an invalid local config file', async () => {
+    process.env.CONFIG_PATH = 'localConfig.yaml';
+    loadYaml.mockReturnValueOnce(null);
+    const result = await loadCustomConfig();
+    expect(result).toBeNull();
+  });
+
+  it('should parse, validate, and cache a valid local configuration', async () => {
+    const mockConfig = {
+      version: '1.0',
+      cache: true,
+      endpoints: {
+        custom: [
+          {
+            name: 'mistral',
+            apiKey: 'user_provided',
+            baseURL: 'https://api.mistral.ai/v1',
+          },
+        ],
+      },
+    };
+    process.env.CONFIG_PATH = 'validConfig.yaml';
+    loadYaml.mockReturnValueOnce(mockConfig);
+    const result = await loadCustomConfig();
+
+    expect(result).toEqual(mockConfig);
+    expect(mockSet).toHaveBeenCalledWith(expect.anything(), mockConfig);
+  });
+
+  it('should return null and log if config schema validation fails', async () => {
+    const invalidConfig = { invalidField: true };
+    process.env.CONFIG_PATH = 'invalidConfig.yaml';
+    loadYaml.mockReturnValueOnce(invalidConfig);
+
+    const result = await loadCustomConfig();
+
+    expect(result).toBeNull();
+  });
+
+  it('should handle and return null on YAML parse error for a string response from remote', async () => {
+    process.env.CONFIG_PATH = 'http://example.com/config.yaml';
+    axios.get.mockResolvedValue({ data: 'invalidYAMLContent' });
+
+    const result = await loadCustomConfig();
+
+    expect(result).toBeNull();
+  });
+
+  it('should return the custom config object for a valid remote config file', async () => {
+    const mockConfig = {
+      version: '1.0',
+      cache: true,
+      endpoints: {
+        custom: [
+          {
+            name: 'mistral',
+            apiKey: 'user_provided',
+            baseURL: 'https://api.mistral.ai/v1',
+          },
+        ],
+      },
+    };
+    process.env.CONFIG_PATH = 'http://example.com/config.yaml';
+    axios.get.mockResolvedValue({ data: mockConfig });
+    const result = await loadCustomConfig();
+    expect(result).toEqual(mockConfig);
+    expect(mockSet).toHaveBeenCalledWith(expect.anything(), mockConfig);
+  });
+
+  it('should return null if the remote config file is not found', async () => {
+    process.env.CONFIG_PATH = 'http://example.com/config.yaml';
+    axios.get.mockRejectedValue({ response: { status: 404 } });
+    const result = await loadCustomConfig();
+    expect(result).toBeNull();
+  });
+
+  it('should return null if the local config file is not found', async () => {
+    process.env.CONFIG_PATH = 'nonExistentConfig.yaml';
+    loadYaml.mockReturnValueOnce(null);
+    const result = await loadCustomConfig();
+    expect(result).toBeNull();
+  });
+
+  it('should not cache the config if cache is set to false', async () => {
+    const mockConfig = {
+      version: '1.0',
+      cache: false,
+      endpoints: {
+        custom: [
+          {
+            name: 'mistral',
+            apiKey: 'user_provided',
+            baseURL: 'https://api.mistral.ai/v1',
+          },
+        ],
+      },
+    };
+    process.env.CONFIG_PATH = 'validConfig.yaml';
+    loadYaml.mockReturnValueOnce(mockConfig);
+    await loadCustomConfig();
+    expect(mockSet).not.toHaveBeenCalled();
+  });
+
+  it('should log the loaded custom config', async () => {
+    const mockConfig = {
+      version: '1.0',
+      cache: true,
+      endpoints: {
+        custom: [
+          {
+            name: 'mistral',
+            apiKey: 'user_provided',
+            baseURL: 'https://api.mistral.ai/v1',
+          },
+        ],
+      },
+    };
+    process.env.CONFIG_PATH = 'validConfig.yaml';
+    loadYaml.mockReturnValueOnce(mockConfig);
+    await loadCustomConfig();
+    expect(logger.info).toHaveBeenCalledWith('Custom config file loaded:');
+    expect(logger.info).toHaveBeenCalledWith(JSON.stringify(mockConfig, null, 2));
+    expect(logger.debug).toHaveBeenCalledWith('Custom config:', mockConfig);
+  });
+});
--- a/api/server/services/Config/loadDefaultEConfig.js
+++ b/api/server/services/Config/loadDefaultEConfig.js
@@ -1,34 +1,17 @@
-const { EModelEndpoint } = require('librechat-data-provider');
+const { EModelEndpoint, getEnabledEndpoints } = require('librechat-data-provider');
 const loadAsyncEndpoints = require('./loadAsyncEndpoints');
 const { config } = require('./EndpointService');

 /**
 * Load async endpoints and return a configuration object
- * @function loadDefaultEndpointsConfig
+ * @param {Express.Request} req - The request object
 * @returns {Promise<Object.<string, EndpointWithOrder>>} An object whose keys are endpoint names and values are objects that contain the endpoint configuration and an order.
 */
-async function loadDefaultEndpointsConfig() {
-  const { google, gptPlugins } = await loadAsyncEndpoints();
+async function loadDefaultEndpointsConfig(req) {
+  const { google, gptPlugins } = await loadAsyncEndpoints(req);
  const { openAI, assistants, bingAI, anthropic, azureOpenAI, chatGPTBrowser } = config;

-  let enabledEndpoints = [
-    EModelEndpoint.openAI,
-    EModelEndpoint.assistants,
-    EModelEndpoint.azureOpenAI,
-    EModelEndpoint.google,
-    EModelEndpoint.bingAI,
-    EModelEndpoint.chatGPTBrowser,
-    EModelEndpoint.gptPlugins,
-    EModelEndpoint.anthropic,
-  ];
-
-  const endpointsEnv = process.env.ENDPOINTS || '';
-  if (endpointsEnv) {
-    enabledEndpoints = endpointsEnv
-      .split(',')
-      .filter((endpoint) => endpoint?.trim())
-      .map((endpoint) => endpoint.trim());
-  }
+  const enabledEndpoints = getEnabledEndpoints();

  const endpointConfig = {
    [EModelEndpoint.openAI]: openAI,
--- a/api/server/services/Config/loadDefaultModels.js
+++ b/api/server/services/Config/loadDefaultModels.js
@@ -24,7 +24,7 @@ async function loadDefaultModels(req) {
    azure: useAzurePlugins,
    plugins: true,
  });
-  const assistant = await getOpenAIModels({ assistants: true });
+  const assistants = await getOpenAIModels({ assistants: true });

  return {
    [EModelEndpoint.openAI]: openAI,
@@ -34,7 +34,7 @@ async function loadDefaultModels(req) {
    [EModelEndpoint.azureOpenAI]: azureOpenAI,
    [EModelEndpoint.bingAI]: ['BingAI', 'Sydney'],
    [EModelEndpoint.chatGPTBrowser]: chatGPTBrowser,
-    [EModelEndpoint.assistants]: assistant,
+    [EModelEndpoint.assistants]: assistants,
  };
 }

--- a/api/server/services/Endpoints/anthropic/addTitle.js
+++ b/api/server/services/Endpoints/anthropic/addTitle.js
@@ -0,0 +1,32 @@
+const { CacheKeys } = require('librechat-data-provider');
+const getLogStores = require('~/cache/getLogStores');
+const { isEnabled } = require('~/server/utils');
+const { saveConvo } = require('~/models');
+
+const addTitle = async (req, { text, response, client }) => {
+  const { TITLE_CONVO = 'true' } = process.env ?? {};
+  if (!isEnabled(TITLE_CONVO)) {
+    return;
+  }
+
+  if (client.options.titleConvo === false) {
+    return;
+  }
+
+  // If the request was aborted, don't generate the title.
+  if (client.abortController.signal.aborted) {
+    return;
+  }
+
+  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
+  const key = `${req.user.id}-${response.conversationId}`;
+
+  const title = await client.titleConvo({ text, responseText: response?.text });
+  await titleCache.set(key, title, 120000);
+  await saveConvo(req.user.id, {
+    conversationId: response.conversationId,
+    title,
+  });
+};
+
+module.exports = addTitle;
--- a/api/server/services/Endpoints/anthropic/buildOptions.js
+++ b/api/server/services/Endpoints/anthropic/buildOptions.js
@@ -1,9 +1,10 @@
 const buildOptions = (endpoint, parsedBody) => {
-  const { modelLabel, promptPrefix, ...rest } = parsedBody;
+  const { modelLabel, promptPrefix, resendFiles, ...rest } = parsedBody;
  const endpointOption = {
    endpoint,
    modelLabel,
    promptPrefix,
+    resendFiles,
    modelOptions: {
      ...rest,
    },
--- a/Show More
+++ b/Show More