Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configs/cepo_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ planning_n: 3
planning_m: 6
planning_temperature_step1: 0.55
planning_temperature_step2: 0.25
planning_temperature_step3: 0.1
planning_temperature_step4: 0
planning_temperature_step3: 0.10
planning_temperature_step4: 0.01
planning_max_tokens_step1: 4096
planning_max_tokens_step2: 4096
planning_max_tokens_step3: 4096
Expand Down
25 changes: 15 additions & 10 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import secrets
from flask import Flask, request, jsonify
from cerebras.cloud.sdk import Cerebras
# from cerebras.cloud.sdk import Cerebras
from openai import AzureOpenAI, OpenAI
from flask import Response
import json
Expand Down Expand Up @@ -55,13 +55,13 @@ def get_config():
API_KEY = os.environ.get("OPTILLM_API_KEY")
default_client = create_inference_client()
# Cerebras, OpenAI, Azure, or LiteLLM API configuration
elif os.environ.get("CEREBRAS_API_KEY"):
API_KEY = os.environ.get("CEREBRAS_API_KEY")
base_url = server_config['base_url']
if base_url != "":
default_client = Cerebras(api_key=API_KEY, base_url=base_url)
else:
default_client = Cerebras(api_key=API_KEY)
# elif os.environ.get("CEREBRAS_API_KEY"):
# API_KEY = os.environ.get("CEREBRAS_API_KEY")
# base_url = server_config['base_url']
# if base_url != "":
# default_client = Cerebras(api_key=API_KEY, base_url=base_url)
# else:
# default_client = Cerebras(api_key=API_KEY)
elif os.environ.get("OPENAI_API_KEY"):
API_KEY = os.environ.get("OPENAI_API_KEY")
base_url = server_config['base_url']
Expand Down Expand Up @@ -231,10 +231,12 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache
approaches.append(part)
elif '&' in part:
operation = 'AND'
approaches.extend(part.split('&'))
for approach in part.split('&'):
approaches.append(approach.strip())
elif '|' in part:
operation = 'OR'
approaches.extend(part.split('|'))
for approach in part.split('|'):
approaches.append(approach.strip())
else:
parsing_approaches = False
model_parts.append(part)
Expand Down Expand Up @@ -518,6 +520,7 @@ def proxy():
n = data.get('n', server_config['n']) # Get n value from request or config

optillm_approach = data.get('optillm_approach', server_config['approach'])
# print(f'BVE - I think that the approach is {optillm_approach}')
logger.debug(data)
server_config['mcts_depth'] = data.get('mcts_depth', server_config['mcts_depth'])
server_config['mcts_exploration'] = data.get('mcts_exploration', server_config['mcts_exploration'])
Expand All @@ -535,6 +538,7 @@ def proxy():
default_client, api_key = get_config()

operation, approaches, model = parse_combined_approach(model, known_approaches, plugin_approaches)
# print(f'BVE Checking for the combined approach {operation} and {approaches} with models {model}')
logger.info(f'Using approach(es) {approaches}, operation {operation}, with model {model}')

if bearer_token != "" and bearer_token.startswith("sk-"):
Expand Down Expand Up @@ -594,6 +598,7 @@ def proxy():
if isinstance(messages, list) and messages: # Only process if format changed
response = messages[-1]['content']

print(f'I think that we have stream {stream}')
if stream:
return Response(generate_streaming_response(response, model), content_type='text/event-stream')
else:
Expand Down
36 changes: 18 additions & 18 deletions optillm/cepo.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
# Apache license 2 - added after the fork for the CePO method
import re
import cerebras
# import cerebras
import openai
import yaml

from dataclasses import dataclass
from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError
# from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError
from openai import BadRequestError as OpenAIBadRequestError
from typing import Optional, Literal
from typing import Optional, Literal, Any


@dataclass
class CepoConfig:
bestofn_n: int # number of responses to be generated in best of n stage
bestofn_temperature: float # temperature for verifier in best of n stage
bestofn_max_tokens: int # maximum number of tokens for verifier in best of n stage
bestofn_rating_type: Literal["absolute", "pairwise"] # type of rating in best of n stage
planning_n: int # number of plans generated in planning stage
planning_m: int # number of attempts to generate n plans in planning stage
planning_temperature_step1: float # temperature for generator in step 1 of planning stage
planning_temperature_step2: float # temperature for generator in step 2 of planning stage
planning_temperature_step3: float # temperature for generator in step 3 of planning stage
planning_temperature_step4: float # temperature for generator in step 4 of planning stage
planning_max_tokens_step1: int # maximum number of tokens in step 1 of planning stage
planning_max_tokens_step2: int # maximum number of tokens in step 2 of planning stage
planning_max_tokens_step3: int # maximum number of tokens in step 3 of planning stage
planning_max_tokens_step4: int # maximum number of tokens in step 4 of planning stage
bestofn_n: int = 3 # number of responses to be generated in best of n stage
bestofn_temperature: float = 0.1 # temperature for verifier in best of n stage
bestofn_max_tokens: int = 4096 # maximum number of tokens for verifier in best of n stage
bestofn_rating_type: Literal["absolute", "pairwise"] = "absolute" # type of rating in best of n stage
planning_n: int = 3 # number of plans generated in planning stage
planning_m: int = 6 # number of attempts to generate n plans in planning stage
planning_temperature_step1: float = 0.55 # temperature for generator in step 1 of planning stage
planning_temperature_step2: float = 0.25 # temperature for generator in step 2 of planning stage
planning_temperature_step3: float = 0.10 # temperature for generator in step 3 of planning stage
planning_temperature_step4: float = 0.01 # temperature for generator in step 4 of planning stage
planning_max_tokens_step1: int = 4096 # maximum number of tokens in step 1 of planning stage
planning_max_tokens_step2: int = 4096 # maximum number of tokens in step 2 of planning stage
planning_max_tokens_step3: int = 4096 # maximum number of tokens in step 3 of planning stage
planning_max_tokens_step4: int = 4096 # maximum number of tokens in step 4 of planning stage


# given command line arguments which includes a yaml file path, initialize a CePO configuration
Expand Down Expand Up @@ -183,7 +183,7 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
)
final_solution = response.choices[0].message.content
completion_tokens += response.usage.completion_tokens
except (CerebrasBadRequestError, OpenAIBadRequestError) as e:
except (OpenAIBadRequestError) as e:
# In case of an error, take the first plan as the final solution
final_solution = plans[0]
messages = []
Expand Down