Chat Stream Integration (#52)

* rm: axios dependency

* add: stream parsing for normal style

* fix: empty string problem

* add: stream for embedded prompts

* update: version increment
This commit is contained in:
Kevin Dang
2024-04-21 14:40:30 -07:00
committed by GitHub
parent bc989580a9
commit d67106c03e
10 changed files with 140 additions and 173 deletions

View File

@@ -19,6 +19,8 @@ export default event(Events.MessageCreate, async ({ log, msgHist, tokens, ollama
// Only respond if message mentions the bot
if (!message.mentions.has(tokens.clientUid)) return
let shouldStream = false
// Try to query and send embed
try {
const config: Configuration = await new Promise((resolve, reject) => {
@@ -45,11 +47,14 @@ export default event(Events.MessageCreate, async ({ log, msgHist, tokens, ollama
msgHist.capacity = config.options['modify-capacity']
}
// set stream state
shouldStream = config.options['message-stream'] as boolean || false
resolve(config)
})
})
let response: ChatResponse
let response: string
// check if we can push, if not, remove oldest
while (msgHist.size() >= msgHist.capacity) msgHist.dequeue()
@@ -62,9 +67,9 @@ export default event(Events.MessageCreate, async ({ log, msgHist, tokens, ollama
// undefined or false, use normal, otherwise use embed
if (config.options['message-style'])
response = await embedMessage(message, ollama, tokens, msgHist)
response = await embedMessage(message, ollama, tokens, msgHist, shouldStream)
else
response = await normalMessage(message, ollama, tokens, msgHist)
response = await normalMessage(message, ollama, tokens, msgHist, shouldStream)
// If something bad happened, remove user query and stop
if (response == undefined) { msgHist.pop(); return }
@@ -75,7 +80,7 @@ export default event(Events.MessageCreate, async ({ log, msgHist, tokens, ollama
// successful query, save it in context history
msgHist.enqueue({
role: 'assistant',
content: response.message.content
content: response
})
} catch (error: any) {
msgHist.pop() // remove message because of failure

View File

@@ -20,6 +20,18 @@ export type Tokens = {
clientUid: string
}
/**
* Parameters to run the chat query
* @param model the model to run
* @param ollama ollama api client
* @param msgHist message history
*/
export type ChatParams = {
model: string,
ollama: Ollama,
msgHist: UserMessage[]
}
/**
* Format for the messages to be stored when communicating when the bot
* @param role either assistant, user, or system

View File

@@ -3,4 +3,5 @@ export * from './env.js'
export * from './events.js'
export * from './messageEmbed.js'
export * from './messageNormal.js'
export * from './commands.js'
export * from './commands.js'
export * from './streamHandler.js'

View File

@@ -1,6 +1,6 @@
import { EmbedBuilder, Message } from 'discord.js'
import { ChatResponse, Ollama } from 'ollama'
import { UserMessage } from './events.js'
import { ChatParams, UserMessage, streamResponse, blockResponse } from './index.js'
import { Queue } from '../queues/queue.js'
/**
@@ -16,10 +16,12 @@ export async function embedMessage(
channel: string,
model: string
},
msgHist: Queue<UserMessage>
) {
msgHist: Queue<UserMessage>,
stream: boolean
): Promise<string> {
// bot response
let response: ChatResponse
let response: ChatResponse | AsyncGenerator<ChatResponse, any, unknown>
let result: string = ''
// initial message to client
const botMessage = new EmbedBuilder()
@@ -30,28 +32,43 @@ export async function embedMessage(
// send the message
const sentMessage = await message.channel.send({ embeds: [botMessage] })
try {
// Attempt to query model for message
response = await ollama.chat({
model: tokens.model,
messages: msgHist.getItems(),
options: {
num_thread: 8, // remove if optimization needed further
mirostat: 1,
mirostat_tau: 2.0,
top_k: 70
},
stream: false
})
// create params
const params: ChatParams = {
model: tokens.model,
ollama: ollama,
msgHist: msgHist.getItems()
}
// dummy message to let user know that query is underway
const newEmbed = new EmbedBuilder()
try {
// check if embed needs to stream
if (stream) {
response = await streamResponse(params)
for await (const portion of response) {
result += portion.message.content
// new embed per token...
const newEmbed = new EmbedBuilder()
.setTitle(`Responding to ${message.author.tag}`)
.setDescription(result || 'No Content Yet...')
.setColor('#00FF00')
// edit the message
sentMessage.edit({ embeds: [newEmbed] })
}
} else {
response = await blockResponse(params)
result = response.message.content
// only need to create 1 embed again
const newEmbed = new EmbedBuilder()
.setTitle(`Responding to ${message.author.tag}`)
.setDescription(response.message.content || 'No Content to Provide...')
.setDescription(result || 'No Content to Provide...')
.setColor('#00FF00')
// edit the message
sentMessage.edit({ embeds: [newEmbed] })
// edit the message
sentMessage.edit({ embeds: [newEmbed] })
}
} catch(error: any) {
console.log(`[Util: messageEmbed] Error creating message: ${error.message}`)
const errorEmbed = new EmbedBuilder()
@@ -64,5 +81,5 @@ export async function embedMessage(
}
// Hope there is a response! undefined otherwie
return response!!
return result
}

View File

@@ -1,6 +1,6 @@
import { Message } from 'discord.js'
import { ChatResponse, Ollama } from 'ollama'
import { UserMessage } from './events.js'
import { ChatParams, UserMessage, streamResponse, blockResponse } from './index.js'
import { Queue } from '../queues/queue.js'
/**
@@ -16,38 +16,49 @@ export async function normalMessage(
channel: string,
model: string
},
msgHist: Queue<UserMessage>
) {
msgHist: Queue<UserMessage>,
stream: boolean
): Promise<string> {
// bot's respnse
let response: ChatResponse
let response: ChatResponse | AsyncGenerator<ChatResponse, any, unknown>
let result: string = ''
await message.channel.send('Generating Response . . .').then(async sentMessage => {
try {
// Attempt to query model for message
response = await ollama.chat({
const params: ChatParams = {
model: tokens.model,
messages: msgHist.getItems(),
options: {
num_thread: 8, // remove if optimization needed further
mirostat: 1,
mirostat_tau: 2.0,
top_k: 70
},
stream: false
})
ollama: ollama,
msgHist: msgHist.getItems()
}
// check if message length > discord max for normal messages
if (response.message.content.length > 2000) {
sentMessage.edit(response.message.content.slice(0, 2000))
message.channel.send(response.message.content.slice(2000))
} else // edit the 'generic' response to new message
sentMessage.edit(response.message.content)
// run query based on stream preference, true = stream, false = block
if (stream) {
response = await streamResponse(params)
for await (const portion of response) {
// append token to message
result += portion.message.content
// resent current output, THIS WILL BE SLOW due to discord limits!
sentMessage.edit(result || 'No Content Yet...')
}
}
else {
response = await blockResponse(params)
result = response.message.content
// check if message length > discord max for normal messages
if (result.length > 2000) {
sentMessage.edit(result.slice(0, 2000))
message.channel.send(result.slice(2000))
} else // edit the 'generic' response to new message
sentMessage.edit(result)
}
} catch(error: any) {
console.log(`[Util: messageNormal] Error creating message: ${error.message}`)
sentMessage.edit(`**Response generation failed.**\n\nReason: ${error.message}`)
}
})
// Hope there is a response, force client to believe
return response!!
}
// return the string representation of response
return result
}

View File

@@ -0,0 +1,40 @@
import { ChatResponse } from "ollama"
import { ChatParams } from "./index.js"
/**
* Method to query the Ollama client for async generation
* @param params
* @returns Asyn
*/
export async function streamResponse(params: ChatParams): Promise<AsyncGenerator<ChatResponse, any, unknown>> {
return await params.ollama.chat({
model: params.model,
messages: params.msgHist,
options: {
num_thread: 8, // remove if optimization needed further
mirostat: 1,
mirostat_tau: 2.0,
top_k: 70
},
stream: true
})
}
/**
* Method to query the Ollama client for a block response
* @param params parameters to query the client
* @returns ChatResponse generated by the Ollama client
*/
export async function blockResponse(params: ChatParams): Promise<ChatResponse> {
return await params.ollama.chat({
model: params.model,
messages: params.msgHist,
options: {
num_thread: 8, // remove if optimization needed further
mirostat: 1,
mirostat_tau: 2.0,
top_k: 70
},
stream: false
})
}

View File

@@ -1,27 +0,0 @@
import { AxiosResponse } from 'axios'
/**
* When running a /api/chat stream, the output needs to be parsed into an array of objects
* This method is used for development purposes and testing
*
* This will not work as intended with the inclusion of ollama-js, needs to be modified to work with it
*
* @param stream Axios response to from Ollama
*/
export function parseStream(stream: AxiosResponse<any, any>) {
// split string by newline
const keywordObjects: string[] = stream.data.trim().split('\n')
// parse string and load them into objects
const keywordsArray: {
model: string,
created_at: string,
message: {
role: string,
content: string
},
done: boolean
}[] = keywordObjects.map((keywordString) => JSON.parse(keywordString))
return keywordsArray
}