@@ -54,7 +54,7 @@ func (h *InferenceHandler) HandleChatCompletion(ctx context.Context, req ChatCom
5454 inferenceReq := & inference.InferenceRequest {
5555 Model : req .Model ,
5656 MaxTokens : int32 (req .MaxTokens ),
57- Temperature : req .Temperature ,
57+ Temperature : float32 ( req .Temperature ) ,
5858 TopP : 0.9 ,
5959 Stream : req .Stream ,
6060 UserId : userID ,
@@ -86,7 +86,7 @@ func (h *InferenceHandler) HandleStreamingChatCompletion(ctx context.Context, re
8686 inferenceReq := & inference.InferenceRequest {
8787 Model : req .Model ,
8888 MaxTokens : int32 (req .MaxTokens ),
89- Temperature : req .Temperature ,
89+ Temperature : float32 ( req .Temperature ) ,
9090 TopP : 0.9 ,
9191 Stream : true ,
9292 UserId : userID ,
@@ -183,17 +183,16 @@ func (h *InferenceHandler) HandleStreamingChatCompletion(ctx context.Context, re
183183}
184184
185185// convertMessages converts OpenAI format messages to inference format
186- func convertMessages (messages []ChatMessage ) []* inference.ChatMessage {
187- inferenceMessages := make ([]* inference.ChatMessage , len (messages ))
186+ func convertMessages (messages []ChatMessage ) []* inference.Message {
187+ inferenceMessages := make ([]* inference.Message , len (messages ))
188188 for i , msg := range messages {
189- inferenceMessages [i ] = & inference.ChatMessage {
189+ inferenceMessages [i ] = & inference.Message {
190190 Role : msg .Role ,
191191 Content : msg .Content ,
192192 }
193193 }
194194 return inferenceMessages
195195}
196-
197196// convertToOpenAIFormat converts inference response to OpenAI format
198197func (h * InferenceHandler ) convertToOpenAIFormat (resp * inference.InferenceResponse , req ChatCompletionRequest ) * ChatCompletionResponse {
199198 return & ChatCompletionResponse {
0 commit comments