Skip to content
Snippets Groups Projects
Commit e3f4751f authored by Ming Ding's avatar Ming Ding
Browse files

del some unused things

parent b32472a9
No related branches found
No related tags found
No related merge requests found
Showing
with 7 additions and 827 deletions
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample Generate GPT2"""
import os
import stat
import random
import numpy as np
import torch
import torch.nn.functional as F
import argparse
import time
from datetime import datetime
from arguments import get_args
from utils import Timers
from pretrain_gpt2 import initialize_distributed
from pretrain_gpt2 import set_random_seed
from utils import load_checkpoint, get_checkpoint_iteration
from data_utils import get_tokenizer
import mpu
import deepspeed
from model import GPT2Model
from utils import print_rank_0
from pretrain_gpt2 import get_model
import math
from copy import deepcopy
from tqdm import tqdm
from generation import get_batch, filling_sequence, add_interlacing_beam_marks, magnify, inverse_prompt_score, filling_sequence_local, filling_sequence_cuda_2d
from torchvision.utils import save_image
import torch.distributed as dist
def setup_model(args):
"""Setup model and optimizer."""
model = get_model(args)
if args.load is not None:
if args.deepspeed:
iteration, release, success = get_checkpoint_iteration(args)
path = os.path.join(args.load, str(iteration), "mp_rank_00_model_states.pt")
print('current device:', torch.cuda.current_device())
checkpoint = torch.load(path, map_location=torch.device('cpu'))
model.load_state_dict(checkpoint["module"])
print(f"Load model file {path}")
else:
_ = load_checkpoint(
model, None, None, args, load_optimizer_states=False)
return model
def _parse_and_to_tensor(text, img_size=256, query_template='{}'):
tokenizer = get_tokenizer()
text = query_template.format(*text.split('\t'))
seq = tokenizer.parse_query(text, img_size=img_size)
seq = torch.cuda.LongTensor(seq)
return seq
def get_context(args, query_template='{}'):
tokenizer = get_tokenizer()
terminate_runs = 0
img_size = 256 if args.generation_task != 'low-level super-resolution' else 128
ml = max(args.max_position_embeddings, args.max_position_embeddings_finetune)
output_path = args.output_path
if args.input_source == 'interactive':
assert not args.with_id, '--with-id is only used with file inputs.'
if args.generation_task == 'post-selection':
raise ValueError('post-selection only takes file inputs!')
while True:
raw_text = input("\nPlease Input Query (stop to exit) >>> ")
if not raw_text:
print('Query should not be empty!')
continue
if raw_text == "stop":
return
try:
seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template)
except (ValueError, FileNotFoundError) as e:
print(e)
continue
if len(seq) > ml:
print("\nSeq length", len(seq),
f"\nPlease give smaller context than {ml}!")
continue
yield (raw_text, seq, output_path)
else:
with open(args.input_source, 'r') as fin:
inputs = fin.readlines()
for line_no, raw_text in enumerate(inputs):
if line_no % dist.get_world_size() != dist.get_rank():
continue
rk = dist.get_rank()
print(f'Working on No. {line_no} on {rk}... ')
raw_text = raw_text.strip()
if len(raw_text) == 0:
continue
if args.with_id: # with id
parts = raw_text.split('\t')
output_path = os.path.join(args.output_path, parts[0])
raw_text = '\t'.join(parts[1:])
if args.generation_task == 'post-selection':
parts = raw_text.split('\t')
seqs = []
for part in parts[1:]:
try:
seq_single = _parse_and_to_tensor('\t'.join([part, parts[0]]), img_size=img_size, query_template=query_template)
seqs.append(seq_single)
except (ValueError, FileNotFoundError) as e:
print(e)
continue
seq = torch.stack(seqs)
else:
try:
seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template)
except (ValueError, FileNotFoundError) as e:
print(e)
continue
if len(seq) > ml:
print("\nSeq length", len(seq),
f"\nPlease give smaller context than {ml}!")
continue
yield (raw_text, seq, output_path)
def generate_images_once(model, args, raw_text, seq=None, num=8, query_template='{}', output_path='./samples'):
tokenizer = get_tokenizer()
if not os.path.exists(output_path):
os.makedirs(output_path)
if seq is None: # need parse
img_size = 256 if args.generation_task != 'low-level super-resolution' else 128
seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template)
model.eval()
with torch.no_grad():
print('show raw text:', raw_text)
start_time = time.time()
if args.generation_task in ['text2image', 'low-level super-resolution']:
invalid_slices = [slice(tokenizer.img_tokenizer.num_tokens, None)]
elif args.generation_task == 'image2text':
invalid_slices = [slice(0, tokenizer.img_tokenizer.num_tokens)]
else:
NotImplementedError
mbz = args.max_inference_batch_size
add_interlacing_beam_marks(seq, nb=min(num, mbz))
assert num < mbz or num % mbz == 0
output_tokens_list = []
for tim in range(max(num // mbz, 1)):
# import line_profiler
# from mpu.sparse_transformer import standard_attention
# profile = line_profiler.LineProfiler(model.module.forward)
# profile = line_profiler.LineProfiler(standard_attention)
# profile.enable()
fill_fn = filling_sequence_cuda_2d if args.generation_task == 'cuda-2d generation' else filling_sequence
output_tokens_list.append(fill_fn(model, seq.clone(), args))
# torch.cuda.empty_cache()
# profile.disable() # 停止分析
# import sys
# profile.print_stats(sys.stdout)
output_tokens_list = torch.cat(output_tokens_list, dim=0)
print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True)
print("\nContext:", raw_text, flush=True)
imgs, txts = [], []
for seq in output_tokens_list:
decoded_txts, decoded_imgs = tokenizer.DecodeIds(seq.tolist())
for i in range(len(decoded_imgs)):
if decoded_imgs[i].shape[-1] < 512:
decoded_imgs[i] = torch.nn.functional.interpolate(decoded_imgs[i], size=(512, 512))
# decoded_imgs[i].view(3, 32, 16, 32, 16)[:, :, :4, :, :4] = 0
# decoded_imgs[i].view(3, 32, 16, 32, 16)[0, :, :4, :, :4] = 1
# decoded_imgs[i].view(3, 32, 16, 32, 16)[1, :12, :4, :16, :4] = 1
if args.debug:
imgs.extend(decoded_imgs)
else:
imgs.append(decoded_imgs[-1]) # only the last image (target)
txts.append(decoded_txts)
if args.generation_task == 'image2text':
print(txts)
return
if args.debug:
output_file_prefix = raw_text.replace('/', '')[:20]
output_file = os.path.join(output_path, f"{output_file_prefix}-{datetime.now().strftime('%m-%d-%H-%M-%S')}.jpg")
imgs = torch.cat(imgs, dim=0)
print(txts)
print("\nSave to: ", output_file, flush=True)
save_image(imgs, output_file, normalize=True)
else:
print("\nSave to: ", output_path, flush=True)
for i in range(len(imgs)):
save_image(imgs[i], os.path.join(output_path,f'{i}.jpg'), normalize=True)
os.chmod(os.path.join(output_path,f'{i}.jpg'), stat.S_IRWXO+stat.S_IRWXG+stat.S_IRWXU)
save_image(torch.cat(imgs, dim=0), os.path.join(output_path,f'concat.jpg'), normalize=True)
os.chmod(os.path.join(output_path,f'concat.jpg'), stat.S_IRWXO+stat.S_IRWXG+stat.S_IRWXU)
def generate_images_continually(model, args):
if args.generation_task == 'text2image':
query_template = '[ROI1] {} [BASE] [BOI1] [MASK]*1024'
elif args.generation_task == 'image2text':
query_template = '[BASE] [BOI1] [Image]{} [EOI1] [ROI1] [MASK]*20'
elif args.generation_task == 'low-level super-resolution':
query_template = '[ROI1] {} [BASE] [BOI1] [Image]{} [EOI1] [ROI2] [POS0] [BASE] [BOI2] [MASK]*1024'
elif args.generation_task == 'super-resolution':
query_template = '[ROI1] {} [BASE] [BOI1] [Image]{}'
elif args.generation_task == 'post-selection':
query_template = '[BASE] [BOI1] [Image]{} [EOI1] [ROI1] {}'
elif args.generation_task == 'cuda-2d generation':
query_template = '[ROI1] {} [BASE] [BOI1] [MASK]*1024 [EOI1] [MASK]*4096'
else:
raise NotImplementedError
for raw_text, seq, output_path in get_context(args, query_template):
if args.generation_task == 'super-resolution':
super_resolution(model, args, raw_text, seq, output_path=output_path)
elif args.generation_task == 'post-selection':
post_selection(model, args, raw_text, seq, output_path=output_path)
else:
generate_images_once(model, args, raw_text, seq, num=args.batch_size, output_path=output_path)
def super_resolution(model, args, raw_text, seq, output_path="./samples"):
tokenizer = get_tokenizer()
model.eval()
if not os.path.exists(output_path):
os.makedirs(output_path)
with torch.no_grad():
start_time = time.time()
output_tokens_list = magnify(model, tokenizer, seq[-32**2:], seq[:-32**2], args)
print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True)
print("\nContext:", raw_text, flush=True)
output_file_prefix = raw_text.replace('/', '')[:20]
output_file = os.path.join(output_path, f"{output_file_prefix}-{datetime.now().strftime('%m-%d-%H-%M-%S')}.jpg")
imgs = []
if args.debug:
imgs.append(torch.nn.functional.interpolate(tokenizer.img_tokenizer.DecodeIds(seq[-32**2:]), size=(512, 512)))
for seq in output_tokens_list:
decoded_txts, decoded_imgs = tokenizer.DecodeIds(seq.tolist())
imgs.extend(decoded_imgs)
imgs = torch.cat(imgs, dim=0)
print("\nSave to: ", output_file, flush=True)
save_image(imgs, output_file, normalize=True)
def post_selection(model, args, raw_text, seq, output_path):
tokenizer = get_tokenizer()
model.eval()
if not os.path.exists(output_path):
os.makedirs(output_path)
with torch.no_grad():
start_time = time.time()
num = seq.shape[0]
mbz = args.max_inference_batch_size
assert num < mbz or num % mbz == 0
scores = [inverse_prompt_score(model, seq[tim*mbz:(tim+1)*mbz], args)
for tim in range(max(num // mbz, 1))
]
scores = torch.cat(scores, dim=0)
# scores = inverse_prompt_score(model, seq, args) # once
print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True)
print("\nContext:", raw_text, flush=True)
rank = dist.get_rank()
output_file = os.path.join(output_path, f"scores_rank_{rank}.txt")
with open(output_file, 'a') as fout:
fout.write(raw_text+'\n')
fout.write('\t'.join([str(x) for x in scores.tolist()])+'\n')
print("\nSave to: ", output_file, flush=True)
def prepare_tokenizer(args):
tokenizer = get_tokenizer(args)
num_tokens = tokenizer.num_tokens
before = num_tokens
after = before
multiple = args.make_vocab_size_divisible_by * \
mpu.get_model_parallel_world_size()
while (after % multiple) != 0:
after += 1
print_rank_0('> padded vocab (size: {}) with {} dummy '
'tokens (new size: {})'.format(
before, after - before, after))
args.vocab_size = after
print("prepare tokenizer done", flush=True)
return tokenizer
def main():
"""Main training program."""
print('Generate Samples')
# Disable CuDNN.
torch.backends.cudnn.enabled = False
# Arguments.
args = get_args()
# Pytorch distributed.
initialize_distributed(args)
# set device, this args.device is only used in inference
if args.device is not None:
device = int(args.device)
torch.cuda.set_device(device)
# Random seeds for reproducability.
# get the tokenizer
tokenizer = prepare_tokenizer(args)
# Model, optimizer, and learning rate.
model = setup_model(args)
set_random_seed(args.seed)
generate_images_continually(model, args)
if __name__ == "__main__":
main()
......@@ -11,7 +11,6 @@ import os
import sys
import math
import random
from tqdm import tqdm
import numpy as np
import torch
......@@ -30,7 +29,7 @@ def magnify(model, tokenizer, tokens_list, text_token_list, args):
magnified_code = code.new_zeros((s * 2, s * 2), dtype=torch.long) - 1
windows = [(0, 0, 18), (0, 1, 30), (0, 2, 30), (1, 1, 30), (1, 0, 30), (1, 2, 30), (2, 0, 32), (2, 1, 32), (2, 2, 32)]
for i, j, line in tqdm(windows):
for i, j, line in windows:
code_part = code[8 * i: 8 * (i+2), 8 * j: 8 * (j+2)].reshape(-1)
magnified_code_part = magnified_code[16 * i: 16 * i + line, 16 * j: 16 * (j+2)].reshape(-1)
......
......@@ -11,7 +11,6 @@ import os
import sys
import math
import random
from tqdm import tqdm
import numpy as np
import torch
......
torch
deepspeed
tqdm
lmdb
filelock
sentencepiece
mpi4py
tensorboardX==1.8
\ No newline at end of file
#!/bin/bash
CHECKPOINT_PATH=data/checkpoints/cogview-base
# CHECKPOINT_PATH=data/checkpoints/cogview-compare
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=5184
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1.
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--sandwich-ln \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--sparse-type cuda_2d \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task "cuda-2d generation" \
--input-source ./input.txt \
--output-path samples_cuda_2d3 \
--batch-size 4 \
--max-inference-batch-size 4 \
--device 0 \
--finetune \
--no-load-optim \
--sparse-type cuda_2d \
--debug \
$@
#!/bin/bash
CHECKPOINT_PATH=pretrained/cogview/cogview-caption
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=1089
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1.
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--query-window 64 \
--key-window-times 4 \
--num-pivot 256 \
--is-sparse 0 \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task image2text \
--input-source interactive \
--output-path samples_image2text \
--batch-size 8 \
--debug \
--device 1 \
$@
#!/bin/bash
CHECKPOINT_PATH=pretrained/cogview/cogview-sr
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=1345
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1.02
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--query-window 64 \
--key-window-times 4 \
--num-pivot 256 \
--is-sparse 0 \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task "low-level super-resolution" \
--input-source interactive \
--output-path samples_low_level_sr \
--batch-size 4 \
--device 6 \
$@
#!/bin/bash
CHECKPOINT_PATH=pretrained/cogview/cogview-caption
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=1089
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1.
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--query-window 64 \
--key-window-times 4 \
--num-pivot 256 \
--is-sparse 0 \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task post-selection \
--input-source input_select.txt \
--output-path samples_post_selection \
--debug \
--device 2 \
$@
# input-source is split by \t, instead of 4 spaces
#! /bin/bash
# Change for multinode config
NUM_WORKERS=19
NUM_GPUS_PER_WORKER=8
MP_SIZE=1
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
main_dir=$(dirname $script_dir)
# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0"
OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_NET_GDR_LEVEL=2"
HOST_FILE_PATH="hostfile"
# OPTIONS_NCCL=""
# HOST_FILE_PATH="hostfile_single"
small_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_4leveltokens/zijian/zijian.bin.part_0.cogdata"
full_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_4leveltokens/merge.bin"
config_json="$script_dir/ds_config_zero.json"
gpt_options=" \
--experiment-name cogview-base-long \
--img-tokenizer-num-tokens 8192 \
--dataset-type CompactBinaryDataset \
--model-parallel-size ${MP_SIZE} \
--num-layers 48 \
--hidden-size 2560 \
--num-attention-heads 40 \
--train-iters 300000 \
--resume-dataloader \
--train-data ${full_data} \
--split 949,50,1 \
--distributed-backend nccl \
--lr-decay-style cosine \
--warmup .1 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--max-position-embeddings 1089 \
--max-memory-length 0 \
--sandwich-ln \
--txt-loss-scale 0.1 \
--sparse-type cuda_2d \
--fp16 \
--save-interval 2000 \
--no-load-optim \
--no-save-optim \
--eval-interval 1000 \
--save $main_dir/data/checkpoints \
--fast-load \
--load data/checkpoints/cogview-base \
--finetune
"
# --finetune
# --save $main_dir/data/checkpoints \
# --restart-iter 199000
gpt_options="${gpt_options}
--deepspeed \
--deepspeed_config ${config_json} \
"
run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}"
echo ${run_cmd}
eval ${run_cmd}
set +x
#! /bin/bash
# Change for multinode config
NUM_WORKERS=1
NUM_GPUS_PER_WORKER=8
MP_SIZE=1
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
main_dir=$(dirname $script_dir)
# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0"
OPTIONS_NCCL="NCCL_DEBUG=info"
HOST_FILE_PATH="hostfile_single"
config_json="$script_dir/ds_config_zero.json"
gpt_options=" \
--experiment-name cogview-testlocal \
--img-tokenizer-num-tokens 8192 \
--dataset-type BinaryDataset \
--model-parallel-size ${MP_SIZE} \
--num-layers 48 \
--hidden-size 2560 \
--num-attention-heads 40 \
--save $main_dir/data/checkpoints \
--train-iters 100000 \
--resume-dataloader \
--train-data /dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/merge.bin \
--split 949,50,1 \
--distributed-backend nccl \
--lr-decay-style cosine \
--warmup .1 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--max-position-embeddings 5184 \
--max-memory-length 0 \
--fp16 \
--txt-loss-scale 2 \
--sandwich-ln \
--sparse-type cuda_2d \
--save-interval 2500
"
gpt_options="${gpt_options}
--deepspeed \
--deepspeed_config ${config_json} \
"
run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}"
echo ${run_cmd}
eval ${run_cmd}
set +x
#!/bin/bash
CHECKPOINT_PATH=pretrained/cogview/cogview-sr
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=1345
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1.02
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--query-window 64 \
--key-window-times 4 \
--num-pivot 256 \
--is-sparse 0 \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task "super-resolution" \
--input-source interactive \
--output-path samples_sr \
--debug \
--device 0 \
$@
#! /bin/bash
# Change for multinode config
NUM_WORKERS=1
NUM_GPUS_PER_WORKER=1
MP_SIZE=1
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
main_dir=$(dirname $script_dir)
# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0"
OPTIONS_NCCL="NCCL_DEBUG=info"
HOST_FILE_PATH="hostfile_single"
small_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/zijian/zijian.bin.part_0.cogdata"
full_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/merge.bin"
config_json="$script_dir/ds_config.json"
gpt_options=" \
--experiment-name cogview-testlocal \
--img-tokenizer-num-tokens 8192 \
--dataset-type CompactBinaryDataset \
--model-parallel-size ${MP_SIZE} \
--num-layers 48 \
--hidden-size 2560 \
--num-attention-heads 40 \
--save $main_dir/data/checkpoints \
--train-iters 100000 \
--resume-dataloader \
--test-data ${full_data} \
--split 949,50,1 \
--distributed-backend nccl \
--lr-decay-style cosine \
--warmup .1 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--max-position-embeddings 1089 \
--max-memory-length 0 \
--txt-loss-scale 1 \
--sandwich-ln \
--sparse-type standard \
--save-interval 2500 \
--fp16 \
--eval-iters 1000 \
--load pretrained/cogview/cogview-base
"
#
# --load data/checkpoints/cogview-fixgrad-small08-25-09-38
gpt_options="${gpt_options}
"
# --deepspeed \
# --deepspeed_config ${config_json} \
run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}"
echo ${run_cmd}
eval ${run_cmd}
set +x
#!/bin/bash
# ==== tutorial settings: =====
# CHECKPOINT_PATH=data/checkpoints/cogview-bird_animal_tutorial-12-1024-1608-10-09-38
# NLAYERS=12
# NHIDDEN=1024
# NATT=16
# CHECKPOINT_PATH=data/checkpoints/cogview-base
CHECKPOINT_PATH=pretrained/cogview/cogview-base
NLAYERS=48
NHIDDEN=2560
NATT=40
MAXSEQLEN=1089
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MPSIZE=1
#SAMPLING ARGS
TEMP=1
#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
TOPK=200
TOPP=0
script_path=$(realpath $0)
script_dir=$(dirname $script_path)
MASTER_PORT=${MASTER_PORT} python generate_samples.py \
--deepspeed \
--model-parallel-size $MPSIZE \
--num-layers $NLAYERS \
--hidden-size $NHIDDEN \
--load $CHECKPOINT_PATH \
--num-attention-heads $NATT \
--max-position-embeddings 1089 \
--fp16 \
--temperature $TEMP \
--top_k $TOPK \
--top_p $TOPP \
--sandwich-ln \
--img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \
--sparse-type standard \
--max-position-embeddings-finetune $MAXSEQLEN \
--generation-task text2image \
--input-source ./input.txt \
--output-path samples_text2image \
--batch-size 8 \
--max-inference-batch-size 8 \
--device 0 \
--debug \
$@
......@@ -11,7 +11,6 @@ import os
import sys
import math
import random
from tqdm import tqdm
import numpy as np
import torch
......
......@@ -11,7 +11,6 @@ import os
import sys
import math
import random
from tqdm import tqdm
import numpy as np
import torch
......
......@@ -11,7 +11,6 @@ import os
import sys
import math
import random
from tqdm import tqdm
import numpy as np
import torch
......
......@@ -29,11 +29,11 @@ import deepspeed
from .learning_rates import AnnealingLR
from .model_io import load_checkpoint, save_checkpoint
from utils import Timers
from utils import report_memory
from utils import print_args
from utils import print_rank_0
from utils import get_sample_writer
from .utils import Timers
from .utils import report_memory
from .utils import print_args
from .utils import print_rank_0
from .utils import get_sample_writer
import mpu
from data_utils import make_loaders
......
......@@ -16,7 +16,7 @@ import torch
import numpy as np
import mpu
from utils import print_rank_0
from .utils import print_rank_0
def get_checkpoint_name(checkpoints_path, iteration, release=False, zero=False):
if release:
......
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment