diff --git a/README.md b/README.md index ed92fdc..5de96ca 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # openai-tokens Accurate token measurment and truncation for OpenAI GPT prompts and embeddings. -[![codecov](https://codecov.io/gh/mrsteele/openai-tokens/branch/main/graph/badge.svg?token=NCG32SMS6Z)](https://codecov.io/gh/mrsteele/openai-tokens) +[![npm version](https://badge.fury.io/js/openai-tokens.svg)](https://badge.fury.io/js/openai-tokens) [![codecov](https://codecov.io/gh/mrsteele/openai-tokens/branch/main/graph/badge.svg?token=NCG32SMS6Z)](https://codecov.io/gh/mrsteele/openai-tokens) ## Features @@ -9,6 +9,7 @@ This package was written by an author who actively uses OpenAI and was running i - 🏃 **FAST** - If you need to run a calculation or truncation quickly, this is the module for you! - 🎯 **Accurate** - This module is arguably the MOST accurate utility, using js-tiktoken which matches exact models. +- 💰 **Cost-Efficient** - Use dynamic wrappers to use appropriate models depending on the prompt size. - 😌 **Seamless** - Integration should be simple. Wrappers make this accessible. - 🔒 **Secure** - Your data is yours, this library just wants to help. @@ -25,22 +26,27 @@ npm i openai-tokens If you have too much content in your request, you can change your model dynamically so you use an appropriate size for each request. ```js -const { validationWrapper } = require('openai-tokens') +const { dynamicWrapper } = require('openai-tokens') const chat = async (messages = []) => { - // the planned request - const request = { model: 'gpt-3.5-turbo', messages } - // if the request is invalid, bump it up - const model = validateWrapper(request).valid ? 'gpt-3.5-turbo' : 'gpt-3.5-turbo-16k' - // override the model to what was programmatically determined const body = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', - body: truncateWrapper({ ...request, model }), - headers: { - Authorization: `Bearer ${OPENAI_KEY}`, - 'Content-Type': 'application/json' - } - }) + // wrap your original content with minor adjustments + body: dynamicWrapper({ + // we test all models till we find a valid one based on the prompt size + model: ['gpt-3.5-turbo', 'gpt-3.5-turbo-16k'], + messages: [{ + role: 'user', + content: 'This prmopt is small, so its going to go with the first one' + }], + // optional arguments we can also pass in + opts: { + buffer: 1000, // add a buffer to make sure GPT can respond + stringify: true // return the results as a string + } + }) + ... + ... ``` @@ -178,6 +184,39 @@ console.log(promptInfo) ``` +#### Options + +You can pass options to the validate wrapper as seen in the examples above. The following are the current supported options: + +* **limit** (Int) - The token limit you want to enforce on the messages/input. This is the aggregated results for messages (GPT/Completions), and the individual results for inputs/embeddings which is how they are calculated by OpenAI. Defaults to the model maximum. +* **buffer** (Int) - The amount of additional restriction you want to apply to the limit. The math equates to `max = limit - buffer`. Defaults to `0`. + +### Dynamic + +A dynamic router has been provided for convenience. This allows you to pass multiple models. The module will choose the first valid model, so you can always maintain the smallest possible (and save some money 💰). + +```js +const { dynamicWrapper } = require('openai-tokens') + +const chat = async (messages = []) => { + const body = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + // wrap your original content with minor adjustments + body: dynamicWrapper({ + // smallest to largest, you decide what sizes you want to support + model: ['gpt-3.5-turbo', 'gpt-3.5-turbo-16k', 'gpt-4-32k'], + messages: [{ + role: 'user', + content: 'This prmopt is small, so its going to go with the first one' + }], + // optional arguments we can also pass in + opts: { + buffer: 1000, // add a buffer to make sure GPT can respond + stringify: true // return the results as a string + } + }) +``` + ## Additional Information ### Token Limits @@ -196,6 +235,42 @@ In working on this module, accuracy was a challenge due to the fact that each mo If you provide a model that is not supported, you will get a console message as well as defaulted to `gpt-3.5-turbo`. +### Can you wrap multiple models together?! + +YES! A good example of this would be using the `dynamicWrapper` and the `truncateWrapper` together like so: + +```js +const { dynamicWrapper, truncateWrapper } = require('openai-tokens') + +const chat = async (messages = []) => { + const body = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + body: truncateWrapper({ + // first we look for a valid prompt + ...dynamicWrapper({ + model: ['gpt-3.5-turbo', 'gpt-3.5-turbo-16k'], + messages: [{ + role: 'user', + content: 'pretend this is huge...' + }], + // these are suppressed in the output + opts: { + buffer: 1000 + } + }), + // opts are not returned from dynamicWrapper, so add them back + opts: { + buffer: 1000, + stringify: true + } + }) +``` + +#### Options + +* **buffer** (Int) - The amount of additional restriction you want to apply to the limit. The math equates to `max = limit - buffer`. Defaults to `0`. +* **stringify** (Bool) - If you want the output to be a stringified JSON object instead of a parsed JSON object. Defaults to `false` + ### Supported Models The following models are supported. Plenty more available upon request (in fact, feel free to submit a PR and become a contributor!) diff --git a/src/dynamic.js b/src/dynamic.js new file mode 100644 index 0000000..67cfb14 --- /dev/null +++ b/src/dynamic.js @@ -0,0 +1,19 @@ +const { validateWrapper } = require('./validate') + +const dynamicWrapper = (originalBody = {}) => { + const model = originalBody.model.find(a => validateWrapper({ ...originalBody, model: a }).valid) + + if (!model) { + console.warn('openai-tokens[dynamic]: No valid model available. Either add larger models, adjust options, wrap it with the `truncateWrapper` or reduce prompt sizes.') + } + + const { opts, ...body } = originalBody + const results = { + ...body, + model: model || model[body.model.length - 1] + } + + return opts?.stringify ? JSON.stringify(results) : results +} + +module.exports.dynamicWrapper = dynamicWrapper diff --git a/src/dynamic.test.js b/src/dynamic.test.js new file mode 100644 index 0000000..4723f92 --- /dev/null +++ b/src/dynamic.test.js @@ -0,0 +1,50 @@ +const { dynamicWrapper } = require('.') +const ten = 'this is 10 tokens long for reference? ' + +const defaultRequest = { + model: ['gpt-3.5-turbo', 'gpt-3.5-turbo-16k'] +} + +describe('dynamicWrapper', () => { + test('picks the first one when valid', () => { + const result = dynamicWrapper({ + ...defaultRequest, + messages: [{ + role: 'user', + content: 'This makes works fine' + }] + }) + + expect(result.model).toBe(defaultRequest.model[0]) + }) + + test('picks the larger when exceeding the first one', () => { + const result = dynamicWrapper({ + ...defaultRequest, + messages: [{ + role: 'user', + content: ten.repeat(500) + }] + }) + + expect(result.model).toBe(defaultRequest.model[1]) + }) + + test('should still stringify if we must', () => { + const args = { + ...defaultRequest, + messages: [{ + role: 'user', + content: 'This makes works fine' + }], + opts: { + stringify: true, + buffer: 1000 + } + } + const result = dynamicWrapper(args) + + const { opts, model, ...body } = args + expect(result.length).toBe(JSON.stringify({ ...body, model: defaultRequest.model[0] }).length) + }) +}) diff --git a/src/index.js b/src/index.js index 1c7f1b3..6a802fe 100644 --- a/src/index.js +++ b/src/index.js @@ -1,9 +1,11 @@ const { validateMessage, validateWrapper } = require('./validate') const { truncateMessage, truncateWrapper } = require('./truncate') +const { dynamicWrapper } = require('./dynamic') module.exports = { validateMessage, validateWrapper, truncateWrapper, - truncateMessage + truncateMessage, + dynamicWrapper } diff --git a/src/models.js b/src/models.js index c2a7047..16721c8 100644 --- a/src/models.js +++ b/src/models.js @@ -71,7 +71,7 @@ const defaultModel = 'gpt-3.5-turbo' const getModel = (model = '') => { const lookup = models[model] if (!lookup) { - console.warn(`The model "${model}" is not currently supported. Defaulting to "${defaultModel}"`) + console.warn(`openai-tokens: The model "${model}" is not currently supported. Defaulting to "${defaultModel}"`) return models[defaultModel] } diff --git a/src/truncate.js b/src/truncate.js index 0ce8353..ba0c7aa 100644 --- a/src/truncate.js +++ b/src/truncate.js @@ -53,7 +53,7 @@ const limitMessages = (messages, limit) => { return limitMessages(messages, limit) } - console.warn('Unable to truncate any further. Prompts too large. Returning unresolvable.') + console.warn('openai-tokens[truncate]: Unable to truncate any further. Prompts too large. Returning unresolvable.') return messages }