@@ -72,10 +72,11 @@ def __init__(self, test_split, name: str = "", model=None, tokenizer=None, devic
7272 self .activation_cache .remove_hooks ()
7373
7474 def get_chunk_path (self , chunk_idx : int ) -> str :
75- return os .path .join (self .activations_dir , f"chunk_{ chunk_idx } .pkl" )
75+ return os .path .join (os . path . dirname ( os . path . abspath ( __file__ )), self .activations_dir , f"chunk_{ chunk_idx } .pkl" )
7676
7777 def save_chunk (self , chunk_data : List [Tuple [List [t .Tensor ], int ]], chunk_idx : int ):
78- os .makedirs (self .activations_dir , exist_ok = True )
78+ full_dir_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )), self .activations_dir )
79+ os .makedirs (full_dir_path , exist_ok = True )
7980 chunk_path = self .get_chunk_path (chunk_idx )
8081 with open (chunk_path , 'wb' ) as f :
8182 pickle .dump (chunk_data , f )
@@ -155,9 +156,9 @@ def get_train_data_stats(self, chunk_idx: int = 0) -> dict:
155156class TruthfulQADataset (ActivationDataset ):
156157 def __init__ (self , config : Dict [str , Any ]= None , model = None , tokenizer = None , device = None , test_split = None , ** kwargs ):
157158 super ().__init__ (test_split , "TruthfulQA" , model , tokenizer , device , config ["activation_size" ])
158- self .data_path : str = './data/TruthfulQA/TruthfulQA.csv'
159+ self .data_path : str = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), './data/TruthfulQA/TruthfulQA.csv' )
159160 self .tqa_df = pd .read_csv (self .data_path )
160- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
161+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
161162 self .num_total_chunks = 1 # TruthfulQA uses single chunk
162163 self .format = eval (config ["short_name" ] + "_format" )
163164
@@ -203,9 +204,9 @@ def populate_dataset(self, force_redo: bool = False, num_tokens: int = 5, max_ro
203204class DishonestQADataset (ActivationDataset ):
204205 def __init__ (self , config : Dict [str , Any ]= None , model = None , tokenizer = None , device = None , test_split = None , ** kwargs ):
205206 super ().__init__ (test_split , "DishonestQA" , model , tokenizer , device , config ["activation_size" ])
206- self .data_path : str = './data/TruthfulQA/TruthfulQA.csv'
207+ self .data_path : str = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), './data/TruthfulQA/TruthfulQA.csv' )
207208 self .tqa_df = pd .read_csv (self .data_path )
208- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
209+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
209210 self .num_total_chunks = 1 # DishonestQA uses single chunk
210211 self .format = eval (config ["short_name" ] + "_format" )
211212
@@ -272,11 +273,12 @@ def __init__(
272273 ):
273274 super ().__init__ (test_split , "AmongUs" , model , tokenizer , device , config ["activation_size" ])
274275 self .name : str = "AmongUs"
275- self .agent_logs_path : str = os .path .join (raw_path , expt_name + "/agent-logs-compact.json" )
276- sys .path .append (".." )
276+ base_dir = os .path .dirname (os .path .abspath (__file__ ))
277+ self .agent_logs_path : str = os .path .join (base_dir , raw_path , expt_name + "/agent-logs-compact.json" )
278+ sys .path .append (os .path .join (base_dir , ".." ))
277279 from utils import load_agent_logs_df
278280 self .agent_logs_df = load_agent_logs_df (self .agent_logs_path )
279- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
281+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
280282 # load number of chunks from existing directory
281283 self .num_total_chunks = 0
282284 self .format = eval (config ["short_name" ] + "_format" )
@@ -335,11 +337,14 @@ def populate_dataset(
335337 print (f"Loaded { self .num_total_chunks } existing chunks" )
336338 return
337339
338- if force_redo and os .path .exists (self .activations_dir ):
339- import shutil
340- shutil .rmtree (self .activations_dir )
340+ if force_redo :
341+ full_dir_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )), self .activations_dir )
342+ if os .path .exists (full_dir_path ):
343+ import shutil
344+ shutil .rmtree (full_dir_path )
341345
342- os .makedirs (self .activations_dir , exist_ok = True )
346+ full_dir_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )), self .activations_dir )
347+ os .makedirs (full_dir_path , exist_ok = True )
343348
344349 # Find last processed chunk
345350 chunk_idx = 0
@@ -383,9 +388,9 @@ def populate_dataset(
383388class RolePlayingDataset (ActivationDataset ):
384389 def __init__ (self , config : Dict [str , Any ]= None , model = None , tokenizer = None , device = None , test_split = None , ** kwargs ):
385390 super ().__init__ (test_split , "Roleplaying" , model , tokenizer , device , config ["activation_size" ])
386- self .data_path : str = './data/Roleplaying/phi4_rollouts.csv'
391+ self .data_path : str = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), './data/Roleplaying/phi4_rollouts.csv' )
387392 self .df = pd .read_csv (self .data_path )
388- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
393+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
389394 self .num_total_chunks = 1 # Roleplaying uses single chunk
390395 self .format = eval (config ["short_name" ] + "_format" )
391396
@@ -424,9 +429,9 @@ def populate_dataset(self, force_redo: bool = False, num_tokens: int = 5, max_ro
424429class RepEngDataset (ActivationDataset ):
425430 def __init__ (self , config : Dict [str , Any ]= None , model = None , tokenizer = None , device = None , test_split = None , ** kwargs ):
426431 super ().__init__ (test_split , "RepEng" , model , tokenizer , device , config ["activation_size" ])
427- self .data_path : str = './data/RepE/true_false_facts.csv'
432+ self .data_path : str = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), './data/RepE/true_false_facts.csv' )
428433 self .df = pd .read_csv (self .data_path )
429- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
434+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
430435 self .num_total_chunks = 1 # RepEng uses single chunk
431436 self .format = eval (config ["short_name" ] + "_format" )
432437
@@ -465,9 +470,9 @@ def populate_dataset(self, force_redo: bool = False, num_tokens: int = 5, max_ro
465470class ApolloProbeDataset (ActivationDataset ):
466471 def __init__ (self , config : Dict [str , Any ]= None , model = None , tokenizer = None , device = None , test_split = None , ** kwargs ):
467472 super ().__init__ (test_split , "ApolloProbe" , model , tokenizer , device , config ["activation_size" ])
468- self .data_path : str = './data/ApolloProbe/common_claim_true_false.csv'
473+ self .data_path : str = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), './data/ApolloProbe/common_claim_true_false.csv' )
469474 self .df = pd .read_csv (self .data_path )
470- self .activations_dir : str = f'./ data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
475+ self .activations_dir : str = f'data/{ self .name } _{ config ["short_name" ]} _acts_{ config ["layer" ]} /'
471476 self .num_total_chunks = 1 # ApolloProbe uses single chunk
472477 self .format = eval (config ["short_name" ] + "_format" )
473478
0 commit comments