fix: tokenize with gpt-3.5-turbo model (#173)

This commit is contained in:
hiroki osame
2023-03-26 22:41:36 -04:00
committed by GitHub
parent 52b62d5a50
commit 7068024f7f
3 changed files with 6 additions and 8 deletions

View File

@@ -33,7 +33,7 @@
"*.ts": "eslint --cache" "*.ts": "eslint --cache"
}, },
"dependencies": { "dependencies": {
"@dqbd/tiktoken": "^0.4.0" "@dqbd/tiktoken": "^1.0.2"
}, },
"devDependencies": { "devDependencies": {
"@clack/prompts": "^0.6.1", "@clack/prompts": "^0.6.1",

8
pnpm-lock.yaml generated
View File

@@ -7,7 +7,7 @@ patchedDependencies:
specifiers: specifiers:
'@clack/prompts': ^0.6.1 '@clack/prompts': ^0.6.1
'@dqbd/tiktoken': ^0.4.0 '@dqbd/tiktoken': ^1.0.2
'@pvtnbr/eslint-config': ^0.33.0 '@pvtnbr/eslint-config': ^0.33.0
'@types/ini': ^1.3.31 '@types/ini': ^1.3.31
'@types/inquirer': ^9.0.3 '@types/inquirer': ^9.0.3
@@ -28,7 +28,7 @@ specifiers:
typescript: ^4.9.5 typescript: ^4.9.5
dependencies: dependencies:
'@dqbd/tiktoken': 0.4.0 '@dqbd/tiktoken': 1.0.2
devDependencies: devDependencies:
'@clack/prompts': 0.6.1_seqcoud6rtee7vmn7zfu7zbwcy '@clack/prompts': 0.6.1_seqcoud6rtee7vmn7zfu7zbwcy
@@ -92,8 +92,8 @@ packages:
- is-unicode-supported - is-unicode-supported
patched: true patched: true
/@dqbd/tiktoken/0.4.0: /@dqbd/tiktoken/1.0.2:
resolution: {integrity: sha512-iaHgmwKAOqowBFZKxelyszoeGLoNw62eOULcmyme1aA1Ymr3JgYl0V7jwpuUm7fksalycZajx3loFn9TRUaviw==} resolution: {integrity: sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw==}
dev: false dev: false
/@esbuild-kit/cjs-loader/2.4.2: /@esbuild-kit/cjs-loader/2.4.2:

View File

@@ -91,8 +91,6 @@ const deduplicateMessages = (array: string[]) => Array.from(new Set(array));
const getPrompt = (locale: string, diff: string) => `Write an insightful but concise Git commit message in a complete sentence in present tense for the following diff without prefacing it with anything, the response must be in the language ${locale}:\n${diff}`; const getPrompt = (locale: string, diff: string) => `Write an insightful but concise Git commit message in a complete sentence in present tense for the following diff without prefacing it with anything, the response must be in the language ${locale}:\n${diff}`;
const model = 'gpt-3.5-turbo'; const model = 'gpt-3.5-turbo';
// TODO: update for the new gpt-3.5 model
const encoder = encodingForModel('text-davinci-003');
export const generateCommitMessage = async ( export const generateCommitMessage = async (
apiKey: string, apiKey: string,
@@ -106,7 +104,7 @@ export const generateCommitMessage = async (
* text-davinci-003 has a token limit of 4000 * text-davinci-003 has a token limit of 4000
* https://platform.openai.com/docs/models/overview#:~:text=to%20Sep%202021-,text%2Ddavinci%2D003,-Can%20do%20any * https://platform.openai.com/docs/models/overview#:~:text=to%20Sep%202021-,text%2Ddavinci%2D003,-Can%20do%20any
*/ */
if (encoder.encode(prompt).length > 4000) { if (encodingForModel(model).encode(prompt).length > 4000) {
throw new KnownError('The diff is too large for the OpenAI API. Try reducing the number of staged changes, or write your own commit message.'); throw new KnownError('The diff is too large for the OpenAI API. Try reducing the number of staged changes, or write your own commit message.');
} }