From 87bf7c0b6d2d67638686f242e8443437172a51ef Mon Sep 17 00:00:00 2001 From: Matt Steele Date: Sat, 5 Aug 2023 19:14:19 -0400 Subject: [PATCH] feat: Adding buffers --- README.md | 63 ++++++++++++++++++++++++++++++++-- src/truncate.js | 80 +++++++++++++++++++++++++++----------------- src/truncate.test.js | 49 +++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index beb7cfe..dd18d57 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,53 @@ This package was written by an author who actively uses OpenAI and was running i npm i openai-tokens ``` -## Basic Usage +## Use-Cases + +### Maintain Chat History + +If the conversations are brief, save as much history as possible. + +```js +// keep as much history as possible +await fetch('https://api.openai.com/v1/completions', { + body: JSON.stringify(truncateWrapper({ + model: 'gpt-3.5-turbo', + opts: { + buffer: 1000 // give a buffer so GPT can respond! + }, + messages: [{ + role: 'system', + content: 'This should always be there!' + }, { + role: 'user', // This will be removed (too big), along with a paired assistant message + content: bigStr + }, { + role: 'assistant', // the pair that is removed + content: 'Just a small string (does not matter, because we remove in pairs)' + }, { + role: 'user', + content: 'Final user prompt' + }] + })) +}) +``` + +### Limit embeddings + +If you want to get the most out of your embeddings, this module can be used for that. + +```js +// protect your requests from going over: +await fetch('https://api.openai.com/v1/embeddings', { + method: 'POST', + body: JSON.stringify(truncateWrapper({ + mode: 'text-embedding-ada-002', + inputs: ['large data set, pretend this goes on for most of eternity...'] + })) +}) +``` + +## Complete Usage ### Truncate @@ -46,7 +92,12 @@ const truncatedBody = truncateWrapper({ opts: { limit: 1000 }, - messages: [{ role: 'user', content: str }] + messages: [ + { role: 'system', content: 'this will never truncate' }, + { role: 'user', content: str }, + { role: 'assistant', content: 'Removes in pairs, so this and the prior "user" message will be removed' }, + { role: 'user', content: 'This will be preserved, because there is no matching "assistant" message.' } + ] }) ``` @@ -94,6 +145,14 @@ console.log(promptInfo) ## Additional Information +### Token Limits + +This service will support maximum response sizes. So if you want to leave room to respond, make sure you leave room to respond. + +From ChatGPT directly: + +> Remember that very long conversations are more likely to receive incomplete replies. For example, if a conversation is 4090 tokens long, the reply will be cut off after only 6 tokens. + ### Undetected Models If you provide a model that is not supported, you will get a console message as well as defaulted to `gpt-3.5-turbo`. diff --git a/src/truncate.js b/src/truncate.js index c8e7552..39ccd1b 100644 --- a/src/truncate.js +++ b/src/truncate.js @@ -1,6 +1,11 @@ -const { getLimit, getTokens } = require('./utils') +const { getLimit, getAllTokens } = require('./utils') const { encode, decode } = require('gpt-3-encoder') +const getBodyLimit = (body = {}) => { + const limit = getLimit(body.opts?.limit || body.model) + return limit - (body.opts?.buffer || 0) +} + const truncateMessage = (content, limit) => { const forceLimit = getLimit(limit) @@ -9,8 +14,9 @@ const truncateMessage = (content, limit) => { return decode(newEncoded) } -const truncateEmbedding = (body = {}, limit) => { - const forceLimit = getLimit(limit || body.model) +const truncateEmbedding = (originalBody = {}) => { + const { opts, ...body } = originalBody + const forceLimit = getBodyLimit(originalBody) if (Array.isArray(body.input)) { const newInput = [] for (let i = 0; i < body.input.length; i++) { @@ -28,43 +34,49 @@ const truncateEmbedding = (body = {}, limit) => { } } -const truncateCompletion = (body = {}, limit) => { - const forceLimit = getLimit(limit || body.model) +// uses redundancy +const limitMessages = (messages, limit) => { + const total = getAllTokens({ messages }) + if (total <= limit) { + return messages + } - // calculate all parts first... - let runningTotal = 0 - const newMessages = body.messages.map(message => { - const tokens = getTokens(message.content) - runningTotal += tokens + // remove in pair + const slices = [ + messages.findIndex(m => m.role === 'user'), + messages.findIndex(m => m.role === 'assistant') + ].sort().reverse() - return { - ...message, - tokens, - runningTotal + // no "nothing" found, pair is removable + if (slices.indexOf(-1) === -1) { + for (const slice of slices) { + messages.splice(slice, 1) } - }) + + // try again + return limitMessages(messages, limit) + } + + console.warn('Unable to truncate any further. Prompts too large. Returning unresolvable.') + return messages +} + +const truncateCompletion = (originalBody = {}) => { + const { opts, ...body } = originalBody + const forceLimit = getBodyLimit(originalBody) + + const runningTotal = getAllTokens(body) // if its good, just send it off - // console.log('forceLimit', getTokens(body.messages[0].content)) - // return forceLimit if (runningTotal <= forceLimit) { return body } - const bigIndex = newMessages.findIndex(m => m.runningTotal > forceLimit) - const newLimit = forceLimit - newMessages.slice(0, bigIndex).reduce((total, current) => total + current.tokens, 0) - const { role, content } = body.messages[bigIndex] - - return ({ + // clone and limit + return { ...body, - messages: [ - ...body.messages.slice(0, bigIndex), - { - role, - content: truncateMessage(content, newLimit) - } - ] - }) + messages: limitMessages(JSON.parse(JSON.stringify(body.messages)), forceLimit) + } } /** @@ -83,7 +95,13 @@ const truncateWrapper = (originalBody = {}, limit) => { } const { opts, ...body } = originalBody const fn = body.input ? truncateEmbedding : truncateCompletion - return fn(body, limit || opts?.limit) + return fn({ + ...body, + opts: { + ...opts, + limit: limit || opts?.limit + } + }) } module.exports = { diff --git a/src/truncate.test.js b/src/truncate.test.js index 24726c0..36a4b27 100644 --- a/src/truncate.test.js +++ b/src/truncate.test.js @@ -1,5 +1,6 @@ const { truncateWrapper } = require('./truncate') +const ten = 'this is 10 tokens long for reference okay? ' const bigStr = 'so not even Matt can explore it '.repeat(650) const str = 'so not even Matt can explore it '.repeat(585) + 'so' // target (18722) @@ -37,4 +38,52 @@ describe('truncateWrapper', () => { expect(response.input).toMatchObject(['so not', 'so not', 'small embed']) }) + + test('should truncate in pairs when they are too big', () => { + const response = truncateWrapper({ + model: 'gpt-3.5-turbo', + messages: [{ + role: 'system', + content: 'This should always be there!' + }, { + role: 'user', + content: bigStr + }, { + role: 'assistant', + content: 'Just a small string (does not matter, because we remove in pairs)' + }, { + role: 'user', + content: 'Final user prompt' + }] + }) + + expect(response.messages).toMatchObject([{ + role: 'system', + content: 'This should always be there!' + }, { + role: 'user', + content: 'Final user prompt' + }]) + }) + + test('should support buffers', () => { + const response = truncateWrapper({ + model: 'gpt-3.5-turbo', + opts: { + buffer: 1000 + }, + messages: [ + ...Array(500).fill({ + role: 'user', + content: ten + }), + ...Array(500).fill({ + role: 'assistant', + content: ten + }) + ] + }) + + expect(response.messages.length).toBe(308) + }) })