Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nasbench/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def flatten_parameters(self):
self.encoder.rnn.flatten_parameters()
self.decoder.rnn.flatten_parameters()

def forward(self, input_variable, target_variable=None):
encoder_outputs, encoder_hidden, arch_emb, predict_value = self.encoder(input_variable)
def forward(self, input_variable, input_len, target_variable=None):
encoder_outputs, encoder_hidden, arch_emb, predict_value = self.encoder(input_variable, input_len)
decoder_hidden = (arch_emb.unsqueeze(0), arch_emb.unsqueeze(0))
decoder_outputs, archs = self.decoder(target_variable, decoder_hidden, encoder_outputs)
decoder_outputs, archs = self.decoder(target_variable, input_len, decoder_hidden, encoder_outputs)
return predict_value, decoder_outputs, archs

def generate_new_arch(self, input_variable, predict_lambda=1, direction='-'):
def generate_new_arch(self, input_variable, input_len, predict_lambda=1, direction='-'):
encoder_outputs, encoder_hidden, arch_emb, predict_value, new_encoder_outputs, new_arch_emb, new_predict_value = self.encoder.infer(
input_variable, predict_lambda, direction=direction)
input_variable, input_len, predict_lambda, direction=direction)
new_encoder_hidden = (new_arch_emb.unsqueeze(0), new_arch_emb.unsqueeze(0))
decoder_outputs, new_archs = self.decoder(None, new_encoder_hidden, new_encoder_outputs)
decoder_outputs, new_archs = self.decoder(None, input_len, new_encoder_hidden, new_encoder_outputs)
return new_archs, new_predict_value
23 changes: 20 additions & 3 deletions nasbench/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import utils
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

SOS_ID = 0
EOS_ID = 0
Expand Down Expand Up @@ -73,25 +75,40 @@ def __init__(self,
for i in range(self.n):
self.offsets.append( (i + 3) * i // 2 - 1)

def forward(self, x, encoder_hidden=None, encoder_outputs=None):
def forward(self, x, x_len, encoder_hidden=None, encoder_outputs=None):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

x_len feels like a somewhat misnomer. If i understood it correctly, it is somewhat like this:
x_len = [len(x) for x in xs]
So, maybe x_len_per_elem? x_len_list? Or at least some comments would be helpful!

# x is decoder_inputs = [0] + encoder_inputs[:-1]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this


decoder_hidden = self._init_state(encoder_hidden)
if x is not None:
bsz = x.size(0)
tgt_len = x.size(1)
x = self.embedding(x)
x = F.dropout(x, self.dropout, training=self.training)
residual = x

x = pack_padded_sequence(x, x_len, batch_first=True)
x, hidden = self.rnn(x, decoder_hidden)
x = pad_packed_sequence(x, batch_first=True)[0]

x = (residual + x) * math.sqrt(0.5)
residual = x
x, _ = self.attention(x, encoder_outputs)

# create mask
mask = torch.zeros(bsz, x.size(1))
for i,l in enumerate(x_len):
for j in range(l):
mask[i][j] = 1

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about

for i, l in enumerate(x_len):
  mask[i, :j] = 1

?

mask = (mask == 0).unsqueeze(1)
mask = utils.move_to_cuda(mask)

x, _ = self.attention(x, encoder_outputs, mask=mask)
x = (residual + x) * math.sqrt(0.5)
predicted_softmax = F.log_softmax(self.out(x.view(-1, self.hidden_size)), dim=-1)
predicted_softmax = predicted_softmax.view(bsz, tgt_len, -1)
return predicted_softmax, None

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is predicted softmax returns sane values? If padded elements are zero-initialized, then probabilty values will be broken.



# inference
# inference : not using xlen. pad packed.
assert x is None
bsz = encoder_hidden[0].size(1)
length = self.length
Expand Down
12 changes: 9 additions & 3 deletions nasbench/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

import logging
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence


class Encoder(nn.Module):
Expand Down Expand Up @@ -53,11 +55,15 @@ def forward_predictor(self, x):
predict_value = torch.sigmoid(x)
return predict_value

def forward(self, x):
def forward(self, x, x_len):
x = self.embedding(x)
x = F.dropout(x, self.dropout, training=self.training)
residual = x

x = pack_padded_sequence(x, x_len, batch_first=True)
x, hidden = self.rnn(x)
x = pad_packed_sequence(x, batch_first=True)[0]

x = self.out_proj(x)
x = residual + x
x = F.normalize(x, 2, dim=-1)
Expand All @@ -79,8 +85,8 @@ def forward(self, x):
predict_value = torch.sigmoid(x)
return encoder_outputs, encoder_hidden, arch_emb, predict_value

def infer(self, x, predict_lambda, direction='-'):
encoder_outputs, encoder_hidden, arch_emb, predict_value = self(x)
def infer(self, x, x_len, predict_lambda, direction='-'):
encoder_outputs, encoder_hidden, arch_emb, predict_value = self(x, x_len)
grads_on_outputs = torch.autograd.grad(predict_value, encoder_outputs, torch.ones_like(predict_value))[0]
if direction == '+':
new_encoder_outputs = encoder_outputs + predict_lambda * grads_on_outputs
Expand Down
6 changes: 2 additions & 4 deletions nasbench/runs/train_seminas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ cd ..
export PYTHONPATH=.:$PYTHONPATH
MODEL=seminas
OUTPUT_DIR=outputs/$MODEL

DATASET_DIR=home/dzzp/workspace/dataset/
mkdir -p $OUTPUT_DIR

python train_seminas.py \
--output_dir=$OUTPUT_DIR \
| tee $OUTPUT_DIR/log.txt
CUDA_VISIBLE_DEVICES=1 python3 train_seminas.py --data=$DATASET_DIR --output_dir=$OUTPUT_DIR | tee $OUTPUT_DIR/log.txt
62 changes: 46 additions & 16 deletions nasbench/train_seminas.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,28 @@ def controller_train(train_queue, model, optimizer):
nll = utils.AvgrageMeter()
model.train()
for step, sample in enumerate(train_queue):
encoder_input = utils.move_to_cuda(sample['encoder_input'])
encoder_target = utils.move_to_cuda(sample['encoder_target'])
decoder_input = utils.move_to_cuda(sample['decoder_input'])
decoder_target = utils.move_to_cuda(sample['decoder_target'])
encoder_input_unsorted = sample['encoder_input'].long() # shape maybe (batch size, max seq length, word length)
encoder_target_unsorted = sample['encoder_target'].float()
decoder_input_unsorted = sample['decoder_input'].long()
decoder_target_unsorted = sample['decoder_target'].long()
input_len_unsorted = sample['input_len']

# sort input batch
input_len, sort_index = torch.sort(input_len_unsorted, 0, descending=True)
input_len = input_len.numpy().tolist()
encoder_input = torch.index_select(encoder_input_unsorted, 0, sort_index)
encoder_target = torch.index_select(encoder_target_unsorted, 0, sort_index)
decoder_input = torch.index_select(decoder_input_unsorted, 0, sort_index)
decoder_target = torch.index_select(decoder_target_unsorted, 0, sort_index)

# move to cuda
encoder_input = utils.move_to_cuda(encoder_input) # shape maybe (batch size, max seq length, word length)
encoder_target = utils.move_to_cuda(encoder_target)
decoder_input = utils.move_to_cuda(decoder_input)
decoder_target = utils.move_to_cuda(decoder_target)

optimizer.zero_grad()
predict_value, log_prob, arch = model(encoder_input, decoder_input)
predict_value, log_prob, arch = model(encoder_input, input_len, decoder_input)
loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze())
loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1))
loss = args.trade_off * loss_1 + (1 - args.trade_off) * loss_2
Expand All @@ -88,9 +103,17 @@ def controller_infer(queue, model, step, direction='+'):
new_predict_values = []
model.eval()
for i, sample in enumerate(queue):
encoder_input = utils.move_to_cuda(sample['encoder_input'])
encoder_input_unsorted = sample['encoder_input'].long() # shape maybe (batch size, max seq length, word length)
input_len_unsorted = sample['input_len']
# sort input batch
input_len, sort_index = torch.sort(input_len_unsorted, 0, descending=True)
input_len = input_len.numpy().tolist()
encoder_input = torch.index_select(encoder_input_unsorted, 0, sort_index)
# move to gpu
encoder_input = utils.move_to_cuda(encoder_input)

model.zero_grad()
new_arch, new_predict_value = model.generate_new_arch(encoder_input, step, direction=direction)
new_arch, new_predict_value = model.generate_new_arch(encoder_input, input_len, step, direction=direction)
new_arch_list.extend(new_arch.data.squeeze().tolist())
new_predict_values.extend(new_predict_value.data.squeeze().tolist())
return new_arch_list, new_predict_values
Expand Down Expand Up @@ -122,8 +145,16 @@ def generate_synthetic_controller_data(nasbench, model, base_arch=None, random_a
with torch.no_grad():
model.eval()
for sample in controller_synthetic_queue:
encoder_input = sample['encoder_input'].cuda()
_, _, _, predict_value = model.encoder(encoder_input)
encoder_input_unsorted = sample['encoder_input'].long() # shape maybe (batch size, max seq length, word length)
input_len_unsorted = sample['input_len']
# sort input batch
input_len, sort_index = torch.sort(input_len_unsorted, 0, descending=True)
input_len = input_len.numpy().tolist()
encoder_input = torch.index_select(encoder_input_unsorted, 0, sort_index)
# move to gpu
encoder_input = utils.move_to_cuda(encoder_input)

_, _, _, predict_value = model.encoder(encoder_input, input_len)
random_synthetic_target += predict_value.data.squeeze().tolist()
assert len(random_synthetic_input) == len(random_synthetic_target)
synthetic_input = random_synthetic_input
Expand All @@ -148,7 +179,7 @@ def main():

args.source_length = args.encoder_length = args.decoder_length = (args.nodes + 2) * (args.nodes - 1) // 2

nasbench = api.NASBench(os.path.join(args.data, 'nasbench_full.tfrecord'))
nasbench = api.NASBench(os.path.join(args.data, 'nasbench_only108.tfrecord'))

controller = NAO(
args.encoder_layers,
Expand All @@ -174,8 +205,8 @@ def main():
logging.info('Iteration {}'.format(i+1))
if not child_arch_pool_valid_acc:
for arch in child_arch_pool:
data = nasbench.query(arch)
child_arch_pool_valid_acc.append(data['validation_accuracy'])
val_acc = nasbench.query(arch, option='valid')
child_arch_pool_valid_acc.append(val_acc)

arch_pool += child_arch_pool
arch_pool_valid_acc += child_arch_pool_valid_acc
Expand All @@ -200,8 +231,7 @@ def main():
print('Architecutre connection:{}'.format(arch_pool[arch_index].matrix))
print('Architecture operations:{}'.format(arch_pool[arch_index].ops))
print('Valid accuracy:{}'.format(arch_pool_valid_acc[arch_index]))
fs, cs = nasbench.get_metrics_from_spec(arch_pool[arch_index])
test_acc = np.mean([cs[108][j]['final_test_accuracy'] for j in range(3)])
test_acc = nasbench.query(arch_pool[arch_index], option='test')
print('Mean test accuracy:{}'.format(test_acc))
break

Expand Down Expand Up @@ -245,9 +275,9 @@ def main():
logging.info('Generate new architectures with step size %d', predict_step_size)
new_seq, new_perfs = controller_infer(controller_infer_queue, controller, predict_step_size, direction='+')
for seq in new_seq:
matrix, ops = utils.convert_seq_to_arch(seq)
matrix, ops = utils.convert_seq_to_arch(seq, nasbench.search_space)
arch = api.ModelSpec(matrix=matrix, ops=ops)
if nasbench.is_valid(arch) and len(arch.ops) == 7 and seq not in train_encoder_input and seq not in new_seqs:
if nasbench.is_valid(arch) and seq not in train_encoder_input and seq not in new_seqs:
new_archs.append(arch)
new_seqs.append(seq)
if len(new_seqs) >= args.new_arch:
Expand Down
71 changes: 37 additions & 34 deletions nasbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torch
import torch.utils.data
import torch.nn.functional as F
from torch.autograd import Variable
from nasbench import api

INPUT = 'input'
Expand Down Expand Up @@ -48,19 +49,19 @@ def generate_arch(n, nasbench, need_perf=False):
np.random.shuffle(all_keys)
for key in all_keys:
fixed_stat, computed_stat = nasbench.get_metrics_from_hash(key)
if len(fixed_stat['module_operations']) < 7:
continue
#if len(fixed_stat['module_operations']) < 7:
# continue

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove plz

arch = api.ModelSpec(
matrix=fixed_stat['module_adjacency'],
ops=fixed_stat['module_operations'],
)
if need_perf:
data = nasbench.query(arch)
if data['validation_accuracy'] < 0.9:
val_acc = nasbench.query(arch, option='valid')
if val_acc < 0.9:
continue
valid_accs.append(data['validation_accuracy'])
valid_accs.append(val_acc)
archs.append(arch)
seqs.append(convert_arch_to_seq(arch.matrix, arch.ops))
seqs.append(convert_arch_to_seq(arch.matrix, arch.ops, nasbench.search_space))
count += 1
if count >= n:
return archs, seqs, valid_accs
Expand All @@ -75,74 +76,76 @@ def __init__(self, inputs, targets=None, train=True, sos_id=0, eos_id=0):
super(ControllerDataset, self).__init__()
if targets is not None:
assert len(inputs) == len(targets)
self.inputs = inputs
self.inputs = inputs # list of seqs
self.len_inputs = [len(i) for i in inputs]
self.max_len = max(self.len_inputs)
self.targets = targets
self.train = train
self.sos_id = sos_id
self.eos_id = eos_id

def __getitem__(self, index):
encoder_input = self.inputs[index]
encoder_input = self.inputs[index] + [0 for _ in range(self.max_len - len(self.inputs[index]))] # fix length as max_len
len_input = self.len_inputs[index]
encoder_target = None
if self.targets is not None:
encoder_target = [self.targets[index]]
if self.train:
decoder_input = [self.sos_id] + encoder_input[:-1]
sample = {
'encoder_input': torch.LongTensor(encoder_input),
'encoder_target': torch.FloatTensor(encoder_target),
'decoder_input': torch.LongTensor(decoder_input),
'decoder_target': torch.LongTensor(encoder_input),
'encoder_input': np.array(encoder_input, dtype=np.int64),
'encoder_target': np.array(encoder_target, dtype=np.float64),
'decoder_input': np.array(decoder_input, dtype=np.int64),
'decoder_target': np.array(encoder_input, dtype=np.int64),
'input_len': len_input,
}
else:
sample = {
'encoder_input': torch.LongTensor(encoder_input),
'decoder_target': torch.LongTensor(encoder_input),
'encoder_input': np.array(encoder_input, dtype=np.int64),
'decoder_target': np.array(encoder_input, dtype=np.int64),
'input_len': len_input,
}
if encoder_target is not None:
sample['encoder_target'] = torch.FloatTensor(encoder_target)
sample['encoder_target'] = np.array(encoder_target, dtype=np.float64)
return sample

def __len__(self):
return len(self.inputs)


def convert_arch_to_seq(matrix, ops):
def convert_arch_to_seq(matrix, ops, search_space):
seq = []
n = len(matrix)
assert n == len(ops)

for col in range(1, n):
for row in range(col):
seq.append(matrix[row][col]+1)
if ops[col] == CONV1X1:
seq.append(3)
elif ops[col] == CONV3X3:
seq.append(4)
elif ops[col] == MAXPOOL3X3:
seq.append(5)
if ops[col] == OUTPUT:
seq.append(6)
if ops[col] == 'output':
seq.append(len(search_space) + 3)
elif ops[col] != 'input':
seq.append(search_space.index(ops[col]) + 3)

assert len(seq) == (n+2)*(n-1)/2
return seq


def convert_seq_to_arch(seq):
def convert_seq_to_arch(seq, search_space):
n = int(math.floor(math.sqrt((len(seq) + 1) * 2)))
matrix = [[0 for _ in range(n)] for _ in range(n)]
ops = [INPUT]
ops = ['input']

for i in range(n-1):
offset=(i+3)*i//2
for j in range(i+1):
matrix[j][i+1] = seq[offset+j] - 1
if seq[offset+i+1] == 3:
op = CONV1X1
elif seq[offset+i+1] == 4:
op = CONV3X3
elif seq[offset+i+1] == 5:
op = MAXPOOL3X3
elif seq[offset+i+1] == 6:
op = OUTPUT
idx = seq[offset+i+1] - 3
if idx == len(search_space):
op = 'output'
else:
op = search_space[idx]
ops.append(op)

return matrix, ops


Expand Down