Skip to content

Commit

Permalink
Merge pull request #5 from mrsteele/feature/buffer
Browse files Browse the repository at this point in the history
feat: Adding buffers
  • Loading branch information
mrsteele authored Aug 5, 2023
2 parents 72ceb7f + 87bf7c0 commit 605fa36
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 33 deletions.
63 changes: 61 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,53 @@ This package was written by an author who actively uses OpenAI and was running i
npm i openai-tokens
```

## Basic Usage
## Use-Cases

### Maintain Chat History

If the conversations are brief, save as much history as possible.

```js
// keep as much history as possible
await fetch('https://api.openai.com/v1/completions', {
body: JSON.stringify(truncateWrapper({
model: 'gpt-3.5-turbo',
opts: {
buffer: 1000 // give a buffer so GPT can respond!
},
messages: [{
role: 'system',
content: 'This should always be there!'
}, {
role: 'user', // This will be removed (too big), along with a paired assistant message
content: bigStr
}, {
role: 'assistant', // the pair that is removed
content: 'Just a small string (does not matter, because we remove in pairs)'
}, {
role: 'user',
content: 'Final user prompt'
}]
}))
})
```

### Limit embeddings

If you want to get the most out of your embeddings, this module can be used for that.

```js
// protect your requests from going over:
await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
body: JSON.stringify(truncateWrapper({
mode: 'text-embedding-ada-002',
inputs: ['large data set, pretend this goes on for most of eternity...']
}))
})
```

## Complete Usage

### Truncate

Expand Down Expand Up @@ -46,7 +92,12 @@ const truncatedBody = truncateWrapper({
opts: {
limit: 1000
},
messages: [{ role: 'user', content: str }]
messages: [
{ role: 'system', content: 'this will never truncate' },
{ role: 'user', content: str },
{ role: 'assistant', content: 'Removes in pairs, so this and the prior "user" message will be removed' },
{ role: 'user', content: 'This will be preserved, because there is no matching "assistant" message.' }
]
})
```

Expand Down Expand Up @@ -94,6 +145,14 @@ console.log(promptInfo)

## Additional Information

### Token Limits

This service will support maximum response sizes. So if you want to leave room to respond, make sure you leave room to respond.

From ChatGPT directly:

> Remember that very long conversations are more likely to receive incomplete replies. For example, if a conversation is 4090 tokens long, the reply will be cut off after only 6 tokens.
### Undetected Models

If you provide a model that is not supported, you will get a console message as well as defaulted to `gpt-3.5-turbo`.
Expand Down
80 changes: 49 additions & 31 deletions src/truncate.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
const { getLimit, getTokens } = require('./utils')
const { getLimit, getAllTokens } = require('./utils')
const { encode, decode } = require('gpt-3-encoder')

const getBodyLimit = (body = {}) => {
const limit = getLimit(body.opts?.limit || body.model)
return limit - (body.opts?.buffer || 0)
}

const truncateMessage = (content, limit) => {
const forceLimit = getLimit(limit)

Expand All @@ -9,8 +14,9 @@ const truncateMessage = (content, limit) => {
return decode(newEncoded)
}

const truncateEmbedding = (body = {}, limit) => {
const forceLimit = getLimit(limit || body.model)
const truncateEmbedding = (originalBody = {}) => {
const { opts, ...body } = originalBody
const forceLimit = getBodyLimit(originalBody)
if (Array.isArray(body.input)) {
const newInput = []
for (let i = 0; i < body.input.length; i++) {
Expand All @@ -28,43 +34,49 @@ const truncateEmbedding = (body = {}, limit) => {
}
}

const truncateCompletion = (body = {}, limit) => {
const forceLimit = getLimit(limit || body.model)
// uses redundancy
const limitMessages = (messages, limit) => {
const total = getAllTokens({ messages })
if (total <= limit) {
return messages
}

// calculate all parts first...
let runningTotal = 0
const newMessages = body.messages.map(message => {
const tokens = getTokens(message.content)
runningTotal += tokens
// remove in pair
const slices = [
messages.findIndex(m => m.role === 'user'),
messages.findIndex(m => m.role === 'assistant')
].sort().reverse()

return {
...message,
tokens,
runningTotal
// no "nothing" found, pair is removable
if (slices.indexOf(-1) === -1) {
for (const slice of slices) {
messages.splice(slice, 1)
}
})

// try again
return limitMessages(messages, limit)
}

console.warn('Unable to truncate any further. Prompts too large. Returning unresolvable.')
return messages
}

const truncateCompletion = (originalBody = {}) => {
const { opts, ...body } = originalBody
const forceLimit = getBodyLimit(originalBody)

const runningTotal = getAllTokens(body)

// if its good, just send it off
// console.log('forceLimit', getTokens(body.messages[0].content))
// return forceLimit
if (runningTotal <= forceLimit) {
return body
}

const bigIndex = newMessages.findIndex(m => m.runningTotal > forceLimit)
const newLimit = forceLimit - newMessages.slice(0, bigIndex).reduce((total, current) => total + current.tokens, 0)
const { role, content } = body.messages[bigIndex]

return ({
// clone and limit
return {
...body,
messages: [
...body.messages.slice(0, bigIndex),
{
role,
content: truncateMessage(content, newLimit)
}
]
})
messages: limitMessages(JSON.parse(JSON.stringify(body.messages)), forceLimit)
}
}

/**
Expand All @@ -83,7 +95,13 @@ const truncateWrapper = (originalBody = {}, limit) => {
}
const { opts, ...body } = originalBody
const fn = body.input ? truncateEmbedding : truncateCompletion
return fn(body, limit || opts?.limit)
return fn({
...body,
opts: {
...opts,
limit: limit || opts?.limit
}
})
}

module.exports = {
Expand Down
49 changes: 49 additions & 0 deletions src/truncate.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const { truncateWrapper } = require('./truncate')

const ten = 'this is 10 tokens long for reference okay? '
const bigStr = 'so not even Matt can explore it '.repeat(650)
const str = 'so not even Matt can explore it '.repeat(585) + 'so'
// target (18722)
Expand Down Expand Up @@ -37,4 +38,52 @@ describe('truncateWrapper', () => {

expect(response.input).toMatchObject(['so not', 'so not', 'small embed'])
})

test('should truncate in pairs when they are too big', () => {
const response = truncateWrapper({
model: 'gpt-3.5-turbo',
messages: [{
role: 'system',
content: 'This should always be there!'
}, {
role: 'user',
content: bigStr
}, {
role: 'assistant',
content: 'Just a small string (does not matter, because we remove in pairs)'
}, {
role: 'user',
content: 'Final user prompt'
}]
})

expect(response.messages).toMatchObject([{
role: 'system',
content: 'This should always be there!'
}, {
role: 'user',
content: 'Final user prompt'
}])
})

test('should support buffers', () => {
const response = truncateWrapper({
model: 'gpt-3.5-turbo',
opts: {
buffer: 1000
},
messages: [
...Array(500).fill({
role: 'user',
content: ten
}),
...Array(500).fill({
role: 'assistant',
content: ten
})
]
})

expect(response.messages.length).toBe(308)
})
})

0 comments on commit 605fa36

Please sign in to comment.