From 87bf7c0b6d2d67638686f242e8443437172a51ef Mon Sep 17 00:00:00 2001
From: Matt Steele <msteele@remedyhealthmedia.com>
Date: Sat, 5 Aug 2023 19:14:19 -0400
Subject: [PATCH] feat: Adding buffers

---
 README.md            | 63 ++++++++++++++++++++++++++++++++--
 src/truncate.js      | 80 +++++++++++++++++++++++++++-----------------
 src/truncate.test.js | 49 +++++++++++++++++++++++++++
 3 files changed, 159 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index beb7cfe..dd18d57 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,53 @@ This package was written by an author who actively uses OpenAI and was running i
 npm i openai-tokens
 ```
 
-## Basic Usage
+## Use-Cases
+
+### Maintain Chat History
+
+If the conversations are brief, save as much history as possible.
+
+```js
+// keep as much history as possible
+await fetch('https://api.openai.com/v1/completions', {
+  body: JSON.stringify(truncateWrapper({
+    model: 'gpt-3.5-turbo',
+    opts: {
+      buffer: 1000 // give a buffer so GPT can respond!
+    },
+    messages: [{
+      role: 'system',
+      content: 'This should always be there!'
+    }, {
+      role: 'user', // This will be removed (too big), along with a paired assistant message
+      content: bigStr
+    }, {
+      role: 'assistant', // the pair that is removed
+      content: 'Just a small string (does not matter, because we remove in pairs)'
+    }, {
+      role: 'user',
+      content: 'Final user prompt'
+    }]
+  }))
+})
+```
+
+### Limit embeddings
+
+If you want to get the most out of your embeddings, this module can be used for that.
+
+```js
+// protect your requests from going over:
+await fetch('https://api.openai.com/v1/embeddings', {
+  method: 'POST',
+  body: JSON.stringify(truncateWrapper({
+    mode: 'text-embedding-ada-002',
+    inputs: ['large data set, pretend this goes on for most of eternity...']
+  }))
+})
+```
+
+## Complete Usage
 
 ### Truncate
 
@@ -46,7 +92,12 @@ const truncatedBody = truncateWrapper({
   opts: {
     limit: 1000
   },
-  messages: [{ role: 'user', content: str }]
+  messages: [
+    { role: 'system', content: 'this will never truncate' },
+    { role: 'user', content: str },
+    { role: 'assistant', content: 'Removes in pairs, so this and the prior "user" message will be removed' },
+    { role: 'user', content: 'This will be preserved, because there is no matching "assistant" message.' }
+  ]
 })
 ```
 
@@ -94,6 +145,14 @@ console.log(promptInfo)
 
 ## Additional Information
 
+### Token Limits
+
+This service will support maximum response sizes. So if you want to leave room to respond, make sure you leave room to respond.
+
+From ChatGPT directly:
+
+> Remember that very long conversations are more likely to receive incomplete replies. For example, if a conversation is 4090 tokens long, the reply will be cut off after only 6 tokens.
+
 ### Undetected Models
 
 If you provide a model that is not supported, you will get a console message as well as defaulted to `gpt-3.5-turbo`.
diff --git a/src/truncate.js b/src/truncate.js
index c8e7552..39ccd1b 100644
--- a/src/truncate.js
+++ b/src/truncate.js
@@ -1,6 +1,11 @@
-const { getLimit, getTokens } = require('./utils')
+const { getLimit, getAllTokens } = require('./utils')
 const { encode, decode } = require('gpt-3-encoder')
 
+const getBodyLimit = (body = {}) => {
+  const limit = getLimit(body.opts?.limit || body.model)
+  return limit - (body.opts?.buffer || 0)
+}
+
 const truncateMessage = (content, limit) => {
   const forceLimit = getLimit(limit)
 
@@ -9,8 +14,9 @@ const truncateMessage = (content, limit) => {
   return decode(newEncoded)
 }
 
-const truncateEmbedding = (body = {}, limit) => {
-  const forceLimit = getLimit(limit || body.model)
+const truncateEmbedding = (originalBody = {}) => {
+  const { opts, ...body } = originalBody
+  const forceLimit = getBodyLimit(originalBody)
   if (Array.isArray(body.input)) {
     const newInput = []
     for (let i = 0; i < body.input.length; i++) {
@@ -28,43 +34,49 @@ const truncateEmbedding = (body = {}, limit) => {
   }
 }
 
-const truncateCompletion = (body = {}, limit) => {
-  const forceLimit = getLimit(limit || body.model)
+// uses redundancy
+const limitMessages = (messages, limit) => {
+  const total = getAllTokens({ messages })
+  if (total <= limit) {
+    return messages
+  }
 
-  // calculate all parts first...
-  let runningTotal = 0
-  const newMessages = body.messages.map(message => {
-    const tokens = getTokens(message.content)
-    runningTotal += tokens
+  // remove in pair
+  const slices = [
+    messages.findIndex(m => m.role === 'user'),
+    messages.findIndex(m => m.role === 'assistant')
+  ].sort().reverse()
 
-    return {
-      ...message,
-      tokens,
-      runningTotal
+  // no "nothing" found, pair is removable
+  if (slices.indexOf(-1) === -1) {
+    for (const slice of slices) {
+      messages.splice(slice, 1)
     }
-  })
+
+    // try again
+    return limitMessages(messages, limit)
+  }
+
+  console.warn('Unable to truncate any further. Prompts too large. Returning unresolvable.')
+  return messages
+}
+
+const truncateCompletion = (originalBody = {}) => {
+  const { opts, ...body } = originalBody
+  const forceLimit = getBodyLimit(originalBody)
+
+  const runningTotal = getAllTokens(body)
 
   // if its good, just send it off
-  // console.log('forceLimit', getTokens(body.messages[0].content))
-  // return forceLimit
   if (runningTotal <= forceLimit) {
     return body
   }
 
-  const bigIndex = newMessages.findIndex(m => m.runningTotal > forceLimit)
-  const newLimit = forceLimit - newMessages.slice(0, bigIndex).reduce((total, current) => total + current.tokens, 0)
-  const { role, content } = body.messages[bigIndex]
-
-  return ({
+  // clone and limit
+  return {
     ...body,
-    messages: [
-      ...body.messages.slice(0, bigIndex),
-      {
-        role,
-        content: truncateMessage(content, newLimit)
-      }
-    ]
-  })
+    messages: limitMessages(JSON.parse(JSON.stringify(body.messages)), forceLimit)
+  }
 }
 
 /**
@@ -83,7 +95,13 @@ const truncateWrapper = (originalBody = {}, limit) => {
   }
   const { opts, ...body } = originalBody
   const fn = body.input ? truncateEmbedding : truncateCompletion
-  return fn(body, limit || opts?.limit)
+  return fn({
+    ...body,
+    opts: {
+      ...opts,
+      limit: limit || opts?.limit
+    }
+  })
 }
 
 module.exports = {
diff --git a/src/truncate.test.js b/src/truncate.test.js
index 24726c0..36a4b27 100644
--- a/src/truncate.test.js
+++ b/src/truncate.test.js
@@ -1,5 +1,6 @@
 const { truncateWrapper } = require('./truncate')
 
+const ten = 'this is 10 tokens long for reference okay? '
 const bigStr = 'so not even Matt can explore it '.repeat(650)
 const str = 'so not even Matt can explore it '.repeat(585) + 'so'
 // target (18722)
@@ -37,4 +38,52 @@ describe('truncateWrapper', () => {
 
     expect(response.input).toMatchObject(['so not', 'so not', 'small embed'])
   })
+
+  test('should truncate in pairs when they are too big', () => {
+    const response = truncateWrapper({
+      model: 'gpt-3.5-turbo',
+      messages: [{
+        role: 'system',
+        content: 'This should always be there!'
+      }, {
+        role: 'user',
+        content: bigStr
+      }, {
+        role: 'assistant',
+        content: 'Just a small string (does not matter, because we remove in pairs)'
+      }, {
+        role: 'user',
+        content: 'Final user prompt'
+      }]
+    })
+
+    expect(response.messages).toMatchObject([{
+      role: 'system',
+      content: 'This should always be there!'
+    }, {
+      role: 'user',
+      content: 'Final user prompt'
+    }])
+  })
+
+  test('should support buffers', () => {
+    const response = truncateWrapper({
+      model: 'gpt-3.5-turbo',
+      opts: {
+        buffer: 1000
+      },
+      messages: [
+        ...Array(500).fill({
+          role: 'user',
+          content: ten
+        }),
+        ...Array(500).fill({
+          role: 'assistant',
+          content: ten
+        })
+      ]
+    })
+
+    expect(response.messages.length).toBe(308)
+  })
 })