configs/cepo_config.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -6,8 +6,8 @@ planning_n: 3
  
    planning_m: 6

    planning_temperature_step1: 0.55

    planning_temperature_step2: 0.25

    planning_temperature_step3: 0.1

    planning_temperature_step4: 0

    planning_temperature_step3: 0.10

    planning_temperature_step4: 0.01

    planning_max_tokens_step1: 4096

    planning_max_tokens_step2: 4096

    planning_max_tokens_step3: 4096

optillm.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -4,7 +4,7 @@
  
    import os

    import secrets

    from flask import Flask, request, jsonify

    from cerebras.cloud.sdk import Cerebras

    # from cerebras.cloud.sdk import Cerebras

    from openai import AzureOpenAI, OpenAI

    from flask import Response

    import json

    @@ -55,13 +55,13 @@ def get_config():
  
            API_KEY = os.environ.get("OPTILLM_API_KEY")

            default_client = create_inference_client()

        # Cerebras, OpenAI, Azure, or LiteLLM API configuration

        elif os.environ.get("CEREBRAS_API_KEY"):

            API_KEY = os.environ.get("CEREBRAS_API_KEY")

            base_url = server_config['base_url']

            if base_url != "":

                default_client = Cerebras(api_key=API_KEY, base_url=base_url)

            else:

                default_client = Cerebras(api_key=API_KEY)

        # elif os.environ.get("CEREBRAS_API_KEY"):

        #     API_KEY = os.environ.get("CEREBRAS_API_KEY")

        #     base_url = server_config['base_url']

        #     if base_url != "":

        #         default_client = Cerebras(api_key=API_KEY, base_url=base_url)

        #     else:

        #         default_client = Cerebras(api_key=API_KEY)

        elif os.environ.get("OPENAI_API_KEY"):

            API_KEY = os.environ.get("OPENAI_API_KEY")

            base_url = server_config['base_url']

    @@ -231,10 +231,12 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache
  
                    approaches.append(part)

                elif '&' in part:

                    operation = 'AND'

                    approaches.extend(part.split('&'))

                    for approach in part.split('&'):

                        approaches.append(approach.strip())

                elif '|' in part:

                    operation = 'OR'

                    approaches.extend(part.split('|'))

                    for approach in part.split('|'):

                        approaches.append(approach.strip())

                else:

                    parsing_approaches = False

                    model_parts.append(part)

    @@ -518,6 +520,7 @@ def proxy():
  
        n = data.get('n', server_config['n'])  # Get n value from request or config

        optillm_approach = data.get('optillm_approach', server_config['approach'])

        # print(f'BVE - I think that the approach is {optillm_approach}')

        logger.debug(data)

        server_config['mcts_depth'] = data.get('mcts_depth', server_config['mcts_depth'])

        server_config['mcts_exploration'] = data.get('mcts_exploration', server_config['mcts_exploration'])

    @@ -535,6 +538,7 @@ def proxy():
  
        default_client, api_key = get_config()

        operation, approaches, model = parse_combined_approach(model, known_approaches, plugin_approaches)

        # print(f'BVE Checking for the combined approach {operation} and {approaches} with models {model}')

        logger.info(f'Using approach(es) {approaches}, operation {operation}, with model {model}')

        if bearer_token != "" and bearer_token.startswith("sk-"):

    @@ -594,6 +598,7 @@ def proxy():
  
            if isinstance(messages, list) and messages:  # Only process if format changed

                response = messages[-1]['content']

        print(f'I think that we have stream {stream}')

        if stream:

            return Response(generate_streaming_response(response, model), content_type='text/event-stream')

        else:

optillm/cepo.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,31 +1,31 @@
  
    # Apache license 2 - added after the fork for the CePO method

    import re

    import cerebras

    # import cerebras

    import openai

    import yaml

    from dataclasses import dataclass

    from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError

    # from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError

    from openai import BadRequestError as OpenAIBadRequestError

    from typing import Optional, Literal

    from typing import Optional, Literal, Any

    @dataclass

    class CepoConfig:

        bestofn_n: int  # number of responses to be generated in best of n stage

        bestofn_temperature: float  # temperature for verifier in best of n stage

        bestofn_max_tokens: int  # maximum number of tokens for verifier in best of n stage

        bestofn_rating_type: Literal["absolute", "pairwise"]  # type of rating in best of n stage

        planning_n: int  # number of plans generated in planning stage

        planning_m: int  # number of attempts to generate n plans in planning stage

        planning_temperature_step1: float  # temperature for generator in step 1 of planning stage

        planning_temperature_step2: float  # temperature for generator in step 2 of planning stage

        planning_temperature_step3: float  # temperature for generator in step 3 of planning stage

        planning_temperature_step4: float  # temperature for generator in step 4 of planning stage

        planning_max_tokens_step1: int  # maximum number of tokens in step 1 of planning stage

        planning_max_tokens_step2: int  # maximum number of tokens in step 2 of planning stage

        planning_max_tokens_step3: int  # maximum number of tokens in step 3 of planning stage

        planning_max_tokens_step4: int  # maximum number of tokens in step 4 of planning stage

        bestofn_n: int = 3  # number of responses to be generated in best of n stage

        bestofn_temperature: float = 0.1  # temperature for verifier in best of n stage

        bestofn_max_tokens: int = 4096 # maximum number of tokens for verifier in best of n stage

        bestofn_rating_type: Literal["absolute", "pairwise"] = "absolute" # type of rating in best of n stage

        planning_n: int = 3 # number of plans generated in planning stage

        planning_m: int = 6 # number of attempts to generate n plans in planning stage

        planning_temperature_step1: float = 0.55 # temperature for generator in step 1 of planning stage

        planning_temperature_step2: float = 0.25 # temperature for generator in step 2 of planning stage

        planning_temperature_step3: float = 0.10 # temperature for generator in step 3 of planning stage

        planning_temperature_step4: float = 0.01 # temperature for generator in step 4 of planning stage

        planning_max_tokens_step1: int = 4096 # maximum number of tokens in step 1 of planning stage

        planning_max_tokens_step2: int = 4096 # maximum number of tokens in step 2 of planning stage

        planning_max_tokens_step3: int = 4096 # maximum number of tokens in step 3 of planning stage

        planning_max_tokens_step4: int = 4096 # maximum number of tokens in step 4 of planning stage

    # given command line arguments which includes a yaml file path, initialize a CePO configuration

    @@ -183,7 +183,7 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
  
            )

            final_solution = response.choices[0].message.content

            completion_tokens += response.usage.completion_tokens

        except (CerebrasBadRequestError, OpenAIBadRequestError) as e:

        except (OpenAIBadRequestError) as e:

            # In case of an error, take the first plan as the final solution

            final_solution = plans[0]

            messages = []

Small cleanups to enable running CePO on GPU systems. #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

bvanessen wants to merge 2 commits into LBANN:cepo from bvanessen:llnl_cepo

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Small cleanups to enable running CePO on GPU systems. #1

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Small cleanups to enable running CePO on GPU systems. #1

Are you sure you want to change the base?

Uh oh!

Small cleanups to enable running CePO on GPU systems. #1

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing