refactor: use tokenizer to accurately count tokens (#114)
Co-authored-by: hiroki osame <hiroki.osame@gmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import https from 'https';
|
||||
import type { CreateCompletionRequest, CreateCompletionResponse } from 'openai';
|
||||
import { encoding_for_model as encodingForModel } from '@dqbd/tiktoken';
|
||||
|
||||
const createCompletion = (
|
||||
apiKey: string,
|
||||
@@ -58,6 +59,9 @@ const deduplicateMessages = (array: string[]) => Array.from(new Set(array));
|
||||
|
||||
const promptTemplate = 'Write an insightful but concise Git commit message in a complete sentence in present tense for the following diff without prefacing it with anything:';
|
||||
|
||||
const model = 'text-davinci-003';
|
||||
const encoder = encodingForModel(model);
|
||||
|
||||
export const generateCommitMessage = async (
|
||||
apiKey: string,
|
||||
diff: string,
|
||||
@@ -65,14 +69,17 @@ export const generateCommitMessage = async (
|
||||
) => {
|
||||
const prompt = `${promptTemplate}\n${diff}`;
|
||||
|
||||
// Accounting for GPT-3's input req of 4k tokens (approx 8k chars)
|
||||
if (prompt.length > 8000) {
|
||||
/**
|
||||
* text-davinci-003 has a token limit of 4000
|
||||
* https://platform.openai.com/docs/models/overview#:~:text=to%20Sep%202021-,text%2Ddavinci%2D003,-Can%20do%20any
|
||||
*/
|
||||
if (encoder.encode(prompt).length > 4000) {
|
||||
throw new Error('The diff is too large for the OpenAI API. Try reducing the number of staged changes, or write your own commit message.');
|
||||
}
|
||||
|
||||
try {
|
||||
const completion = await createCompletion(apiKey, {
|
||||
model: 'text-davinci-003',
|
||||
model,
|
||||
prompt,
|
||||
temperature: 0.7,
|
||||
top_p: 1,
|
||||
|
||||
Reference in New Issue
Block a user