Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
6b25006
Created debug branch
saikrishnarallabandi Oct 24, 2018
4dabe6d
Added updates to debug branch
saikrishnarallabandi Oct 24, 2018
91424db
Updated pkl files
saikrishnarallabandi Oct 24, 2018
b883aad
MMD implementation
saikrishnarallabandi Oct 25, 2018
d8cb7ff
Lightweight Implementation
saikrishnarallabandi Oct 25, 2018
e5ae9bd
Plugged in VI into caption generation module
saikrishnarallabandi Oct 25, 2018
7f338b6
captions barebones
saikrishnarallabandi Oct 25, 2018
a0aeb46
q
saikrishnarallabandi Oct 25, 2018
c3cef05
Barebones version running
saikrishnarallabandi Oct 25, 2018
240fc44
Added barebones version
saikrishnarallabandi Oct 25, 2018
fc8c8ce
Merge branch 'debug' of https://github.com/saikrishnarallabandi/compo…
saikrishnarallabandi Oct 25, 2018
7ed375b
Removed dangling files
saikrishnarallabandi Oct 25, 2018
564541b
Added VAE for captions garage
saikrishnarallabandi Oct 26, 2018
fdfce58
Added VAE for captions garage
saikrishnarallabandi Oct 26, 2018
7945a2c
Added lightweight version for MSVED
saikrishnarallabandi Oct 27, 2018
f27bb04
Added generation and expA for MSVED
saikrishnarallabandi Oct 28, 2018
0b53413
Added MARC class model
saikrishnarallabandi Oct 30, 2018
c1dec36
Added cnn encoder
saikrishnarallabandi Nov 2, 2018
aa9e81c
Added RNNLM
saikrishnarallabandi Nov 3, 2018
16b1747
Update main_rnnlm_barebones.py
saikrishnarallabandi Nov 3, 2018
015f2d1
Update main_rnnlm_barebones.py
saikrishnarallabandi Nov 3, 2018
29b4a0d
Added Model forcing for RNNLM
saikrishnarallabandi Nov 3, 2018
7efca7c
Merge branch 'debug' of https://github.com/saikrishnarallabandi/compo…
saikrishnarallabandi Nov 3, 2018
2a999ad
Removed extra files
saikrishnarallabandi Nov 3, 2018
70638a9
Added Encoder Decoder style modeling for LM
saikrishnarallabandi Nov 3, 2018
c2aacae
Added speech garages
saikrishnarallabandi Nov 4, 2018
ca67127
Added VAE baseline
saikrishnarallabandi Nov 4, 2018
2772f2d
Added RNN baseline
saikrishnarallabandi Nov 7, 2018
5cc1f79
Added VED baseline
saikrishnarallabandi Nov 7, 2018
5fcaffb
Added joint caption vqa model
saikrishnarallabandi Nov 8, 2018
1e7debe
Added barebones
saikrishnarallabandi Nov 8, 2018
40fd743
Added barebones
saikrishnarallabandi Nov 8, 2018
1adb4ee
Added predicting anwswer only from question
saikrishnarallabandi Nov 10, 2018
c0ece2b
Added image question answer baseline
saikrishnarallabandi Nov 11, 2018
5dddb8d
Update train_questionansweronly_barebones.py
saikrishnarallabandi Nov 11, 2018
6054d0d
Added RNN baseline for image+question=answer
saikrishnarallabandi Nov 11, 2018
50076a8
Merge branch 'debug' of https://github.com/saikrishnarallabandi/compo…
saikrishnarallabandi Nov 11, 2018
8f43fa2
Create train_ved_klannealing.py
saikrishnarallabandi Nov 11, 2018
9090a5c
Added RNN baseline for image+question=answer
saikrishnarallabandi Nov 12, 2018
ca53c57
Merge branch 'debug' of https://github.com/saikrishnarallabandi/compo…
saikrishnarallabandi Nov 12, 2018
dbe93d6
Added RNN baseline for image+question=answer
saikrishnarallabandi Nov 12, 2018
3ae2042
resolving merge conflicts
bhavyakarki Nov 13, 2018
a9c678f
Pytorch version of n2mn rough verison
lmorishe Nov 25, 2018
719d4a5
n2nmn pytorch
lmorishe Nov 28, 2018
96e1111
deleteing empty folder
lmorishe Nov 28, 2018
c203ab8
adding pytorch code for n2nmn
lmorishe Nov 28, 2018
6638f2f
Removing old code
lmorishe Nov 29, 2018
baf41e2
Latest code for N2NMN with captions
lmorishe Nov 29, 2018
aa18711
Latest code for garage mcb
lmorishe Dec 8, 2018
9331ce1
MCB additional files
lmorishe Dec 8, 2018
94c7e81
MCB on RNN
lmorishe Dec 8, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 0 additions & 80 deletions 01_captions/00_extract_2014captions.py

This file was deleted.

16 changes: 0 additions & 16 deletions 01_captions/utils.py

This file was deleted.

162 changes: 162 additions & 0 deletions baselines/baseline_cnnrnn/data_loader_barebones.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import nltk
import os
import torch
import sys
sys.path.append('/home/ubuntu/captions/')
import torch.utils.data as data
from vocabulary import Vocabulary
from PIL import Image
from pycocotools.coco import COCO
import numpy as np
from tqdm import tqdm
import random
import json


def collate_fn(batch):
"""Create batch"""

#print("I got ", len(batch), " images in the batch")
input_lengths = [len(x[1]) for x in batch]
max_input_len = np.max(input_lengths) + 1

a = [x[0] for x in batch ]
b = np.array([ _pad(x[1], max_input_len) for x in batch ], dtype=np.int)
c = [x[2] for x in batch ]
d = [x[3] for x in batch ]
b_batch = torch.LongTensor(b)
#print("I am returning images of length ", len(a))
return a, b_batch, c, d

def _pad(seq, max_len):
return np.pad(seq, (0, max_len - len(seq)),
mode='constant', constant_values=0)

def get_loader(transform,mode="train",batch_size=32,vocab_threshold=None,vocab_file="./vocab.pkl", start_word="<start>",end_word="<end>",
unk_word="<unk>",vocab_from_file=True,num_workers=0,cocoapi_loc="/home/ubuntu/captions/"):

assert mode in ["train", "val", "test"], "mode must be one of 'train', 'val' or 'test'."
if vocab_from_file == False:
assert mode == "train", "To generate vocab from captions file, \
must be in training mode (mode='train')."

if mode == "train":
if vocab_from_file == True:
assert os.path.exists(vocab_file), "vocab_file does not exist. \
Change vocab_from_file to False to create vocab_file."
assert batch_size==32

img_folder = "/home/ubuntu/data/VQA/train2014"
annotations_file = os.path.join(cocoapi_loc, "cocoapi/annotations/captions_train2014.json")
if mode == "val":
assert os.path.exists(vocab_file), "Must first generate vocab.pkl from training data."
assert vocab_from_file == True, "Change vocab_from_file to True."
img_folder = "/home/ubuntu/data/VQA/val2014"
annotations_file = os.path.join(cocoapi_loc, "cocoapi/annotations/captions_val2014.json")


if mode == "test":
assert batch_size == 1, "Please change batch_size to 1 if testing your model."
assert os.path.exists(vocab_file), "Must first generate vocab.pkl from training data."
assert vocab_from_file == True, "Change vocab_from_file to True."
img_folder = "/home/ubuntu/data/VQA/test2014"
annotations_file = os.path.join(cocoapi_loc, "cocoapi/annotations/image_info_test2014.json")


dataset = CoCoDataset(transform=transform,mode=mode,batch_size=batch_size,vocab_threshold=vocab_threshold,vocab_file=vocab_file,start_word=start_word,
end_word=end_word,unk_word=unk_word,annotations_file=annotations_file,vocab_from_file=vocab_from_file,img_folder=img_folder)


if mode == "train":
indices = dataset.get_indices()
initial_sampler = data.sampler.SubsetRandomSampler(indices=indices)
data_loader = data.DataLoader(dataset=dataset,
num_workers=num_workers,
batch_size=dataset.batch_size,
collate_fn=collate_fn)
#batch_sampler=data.sampler.BatchSampler(sampler=initial_sampler,
# batch_size=dataset.batch_size,
# drop_last=False))
else:
data_loader = data.DataLoader(dataset=dataset,
batch_size=dataset.batch_size,
shuffle=False,
num_workers=num_workers,
collate_fn=collate_fn)
return data_loader




class CoCoDataset(data.Dataset):
def __init__(self, transform, mode, batch_size, vocab_threshold, vocab_file, start_word,
end_word, unk_word, annotations_file, vocab_from_file, img_folder):
self.transform = transform
self.mode = mode
self.batch_size = batch_size
self.vocab = Vocabulary(vocab_threshold, vocab_file, start_word,
end_word, unk_word, annotations_file, vocab_from_file)
self.img_folder = img_folder
if self.mode == "train" or self.mode == "val":
self.coco = COCO(annotations_file)
self.ids = list(self.coco.anns.keys())
print("Obtaining caption lengths...")
all_tokens = [nltk.tokenize.word_tokenize(
str(self.coco.anns[self.ids[index]]["caption"]).lower())
for index in tqdm(np.arange(len(self.ids)))]
self.caption_lengths = [len(token) for token in all_tokens]
# If in test mode
else:
test_info = json.loads(open(annotations_file).read())
self.paths = [item["file_name"] for item in test_info["images"]]


def __getitem__(self, index):
if self.mode == "train" or self.mode == "val":
ann_id = self.ids[index]
caption = self.coco.anns[ann_id]["caption"]
img_id = self.coco.anns[ann_id]["image_id"]
path = self.coco.loadImgs(img_id)[0]["file_name"]
image = Image.open(os.path.join(self.img_folder, path)).convert("RGB")
image = self.transform(image)

# Convert caption to tensor of word ids.
tokens = nltk.tokenize.word_tokenize(str(caption).lower())
caption_orig = caption
caption = []
caption.append(self.vocab(self.vocab.start_word))
caption.extend([self.vocab(token) for token in tokens])
caption.append(self.vocab(self.vocab.end_word))
caption = torch.Tensor(caption).long()

# Return pre-processed image and caption tensors
return image, caption, path, caption_orig

else:
path = self.paths[index]
print(os.path.join(self.img_folder, path))
# Convert image to tensor and pre-process using transform
PIL_image = Image.open(os.path.join(self.img_folder, path)).convert("RGB")
orig_image = np.array(PIL_image)
image = self.transform(PIL_image)

# Return original image and pre-processed image tensor
return orig_image, image, path




def get_indices(self):
sel_length = np.random.choice(self.caption_lengths)
all_indices = np.where([self.caption_lengths[i] == \
sel_length for i in np.arange(len(self.caption_lengths))])[0]
indices = list(np.random.choice(all_indices, size=self.batch_size))
return indices

def __len__(self):
if self.mode == "train" or self.mode == "val":
return len(self.ids)
else:
return len(self.paths)


Loading