From e3f4751f99adbb50f447ce29ee7dd2ac9a52fa2b Mon Sep 17 00:00:00 2001 From: Ming Ding <dm_thu@qq.com> Date: Sun, 10 Oct 2021 17:26:38 +0000 Subject: [PATCH] del some unused things --- generate_samples.py | 337 ---------------------- generation/magnify.py | 3 +- generation/sampling.py | 1 - requirements.txt | 3 - scripts_old/cuda_2d_text2image.sh | 49 ---- scripts_old/image2text.sh | 46 --- scripts_old/low_level_super_resolution.sh | 45 --- scripts_old/post_selection.sh | 45 --- scripts_old/pretrain_multiple_nodes.sh | 74 ----- scripts_old/pretrain_single_node.sh | 56 ---- scripts_old/super_resolution.sh | 45 --- scripts_old/testnan.sh | 63 ---- scripts_old/text2image.sh | 52 ---- tokenization/cogview/templates.py | 1 - tokenization/cogview/unified_tokenizer.py | 1 - tokenization/cogview/vqvae_tokenizer.py | 1 - training/deepspeed_training.py | 10 +- training/model_io.py | 2 +- utils.py => training/utils.py | 0 19 files changed, 7 insertions(+), 827 deletions(-) delete mode 100755 generate_samples.py delete mode 100755 scripts_old/cuda_2d_text2image.sh delete mode 100755 scripts_old/image2text.sh delete mode 100755 scripts_old/low_level_super_resolution.sh delete mode 100755 scripts_old/post_selection.sh delete mode 100755 scripts_old/pretrain_multiple_nodes.sh delete mode 100755 scripts_old/pretrain_single_node.sh delete mode 100755 scripts_old/super_resolution.sh delete mode 100755 scripts_old/testnan.sh delete mode 100755 scripts_old/text2image.sh rename utils.py => training/utils.py (100%) diff --git a/generate_samples.py b/generate_samples.py deleted file mode 100755 index bbca264..0000000 --- a/generate_samples.py +++ /dev/null @@ -1,337 +0,0 @@ -# coding=utf-8 -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sample Generate GPT2""" - -import os -import stat -import random -import numpy as np -import torch -import torch.nn.functional as F -import argparse -import time -from datetime import datetime -from arguments import get_args -from utils import Timers -from pretrain_gpt2 import initialize_distributed -from pretrain_gpt2 import set_random_seed -from utils import load_checkpoint, get_checkpoint_iteration -from data_utils import get_tokenizer -import mpu -import deepspeed - -from model import GPT2Model -from utils import print_rank_0 -from pretrain_gpt2 import get_model -import math -from copy import deepcopy -from tqdm import tqdm -from generation import get_batch, filling_sequence, add_interlacing_beam_marks, magnify, inverse_prompt_score, filling_sequence_local, filling_sequence_cuda_2d -from torchvision.utils import save_image -import torch.distributed as dist - - -def setup_model(args): - """Setup model and optimizer.""" - model = get_model(args) - - if args.load is not None: - if args.deepspeed: - iteration, release, success = get_checkpoint_iteration(args) - path = os.path.join(args.load, str(iteration), "mp_rank_00_model_states.pt") - print('current device:', torch.cuda.current_device()) - checkpoint = torch.load(path, map_location=torch.device('cpu')) - model.load_state_dict(checkpoint["module"]) - print(f"Load model file {path}") - else: - _ = load_checkpoint( - model, None, None, args, load_optimizer_states=False) - - return model - -def _parse_and_to_tensor(text, img_size=256, query_template='{}'): - tokenizer = get_tokenizer() - text = query_template.format(*text.split('\t')) - seq = tokenizer.parse_query(text, img_size=img_size) - seq = torch.cuda.LongTensor(seq) - return seq - -def get_context(args, query_template='{}'): - tokenizer = get_tokenizer() - terminate_runs = 0 - img_size = 256 if args.generation_task != 'low-level super-resolution' else 128 - ml = max(args.max_position_embeddings, args.max_position_embeddings_finetune) - output_path = args.output_path - - if args.input_source == 'interactive': - assert not args.with_id, '--with-id is only used with file inputs.' - if args.generation_task == 'post-selection': - raise ValueError('post-selection only takes file inputs!') - while True: - raw_text = input("\nPlease Input Query (stop to exit) >>> ") - if not raw_text: - print('Query should not be empty!') - continue - if raw_text == "stop": - return - try: - seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template) - except (ValueError, FileNotFoundError) as e: - print(e) - continue - if len(seq) > ml: - print("\nSeq length", len(seq), - f"\nPlease give smaller context than {ml}!") - continue - yield (raw_text, seq, output_path) - else: - with open(args.input_source, 'r') as fin: - inputs = fin.readlines() - for line_no, raw_text in enumerate(inputs): - if line_no % dist.get_world_size() != dist.get_rank(): - continue - rk = dist.get_rank() - print(f'Working on No. {line_no} on {rk}... ') - raw_text = raw_text.strip() - if len(raw_text) == 0: - continue - if args.with_id: # with id - parts = raw_text.split('\t') - output_path = os.path.join(args.output_path, parts[0]) - raw_text = '\t'.join(parts[1:]) - - if args.generation_task == 'post-selection': - parts = raw_text.split('\t') - seqs = [] - for part in parts[1:]: - try: - seq_single = _parse_and_to_tensor('\t'.join([part, parts[0]]), img_size=img_size, query_template=query_template) - seqs.append(seq_single) - except (ValueError, FileNotFoundError) as e: - print(e) - continue - seq = torch.stack(seqs) - else: - try: - seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template) - except (ValueError, FileNotFoundError) as e: - print(e) - continue - if len(seq) > ml: - print("\nSeq length", len(seq), - f"\nPlease give smaller context than {ml}!") - continue - yield (raw_text, seq, output_path) - - -def generate_images_once(model, args, raw_text, seq=None, num=8, query_template='{}', output_path='./samples'): - tokenizer = get_tokenizer() - if not os.path.exists(output_path): - os.makedirs(output_path) - if seq is None: # need parse - img_size = 256 if args.generation_task != 'low-level super-resolution' else 128 - seq = _parse_and_to_tensor(raw_text, img_size=img_size, query_template=query_template) - model.eval() - with torch.no_grad(): - print('show raw text:', raw_text) - start_time = time.time() - if args.generation_task in ['text2image', 'low-level super-resolution']: - invalid_slices = [slice(tokenizer.img_tokenizer.num_tokens, None)] - elif args.generation_task == 'image2text': - invalid_slices = [slice(0, tokenizer.img_tokenizer.num_tokens)] - else: - NotImplementedError - - mbz = args.max_inference_batch_size - add_interlacing_beam_marks(seq, nb=min(num, mbz)) - assert num < mbz or num % mbz == 0 - output_tokens_list = [] - for tim in range(max(num // mbz, 1)): - # import line_profiler - # from mpu.sparse_transformer import standard_attention - # profile = line_profiler.LineProfiler(model.module.forward) - # profile = line_profiler.LineProfiler(standard_attention) - # profile.enable() - fill_fn = filling_sequence_cuda_2d if args.generation_task == 'cuda-2d generation' else filling_sequence - output_tokens_list.append(fill_fn(model, seq.clone(), args)) - # torch.cuda.empty_cache() - # profile.disable() # åœæ¢åˆ†æž - # import sys - # profile.print_stats(sys.stdout) - - output_tokens_list = torch.cat(output_tokens_list, dim=0) - - print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True) - print("\nContext:", raw_text, flush=True) - imgs, txts = [], [] - for seq in output_tokens_list: - decoded_txts, decoded_imgs = tokenizer.DecodeIds(seq.tolist()) - for i in range(len(decoded_imgs)): - if decoded_imgs[i].shape[-1] < 512: - decoded_imgs[i] = torch.nn.functional.interpolate(decoded_imgs[i], size=(512, 512)) - # decoded_imgs[i].view(3, 32, 16, 32, 16)[:, :, :4, :, :4] = 0 - # decoded_imgs[i].view(3, 32, 16, 32, 16)[0, :, :4, :, :4] = 1 - # decoded_imgs[i].view(3, 32, 16, 32, 16)[1, :12, :4, :16, :4] = 1 - if args.debug: - imgs.extend(decoded_imgs) - else: - imgs.append(decoded_imgs[-1]) # only the last image (target) - txts.append(decoded_txts) - if args.generation_task == 'image2text': - print(txts) - return - if args.debug: - output_file_prefix = raw_text.replace('/', '')[:20] - output_file = os.path.join(output_path, f"{output_file_prefix}-{datetime.now().strftime('%m-%d-%H-%M-%S')}.jpg") - imgs = torch.cat(imgs, dim=0) - print(txts) - print("\nSave to: ", output_file, flush=True) - save_image(imgs, output_file, normalize=True) - else: - print("\nSave to: ", output_path, flush=True) - for i in range(len(imgs)): - save_image(imgs[i], os.path.join(output_path,f'{i}.jpg'), normalize=True) - os.chmod(os.path.join(output_path,f'{i}.jpg'), stat.S_IRWXO+stat.S_IRWXG+stat.S_IRWXU) - save_image(torch.cat(imgs, dim=0), os.path.join(output_path,f'concat.jpg'), normalize=True) - os.chmod(os.path.join(output_path,f'concat.jpg'), stat.S_IRWXO+stat.S_IRWXG+stat.S_IRWXU) - -def generate_images_continually(model, args): - if args.generation_task == 'text2image': - query_template = '[ROI1] {} [BASE] [BOI1] [MASK]*1024' - elif args.generation_task == 'image2text': - query_template = '[BASE] [BOI1] [Image]{} [EOI1] [ROI1] [MASK]*20' - elif args.generation_task == 'low-level super-resolution': - query_template = '[ROI1] {} [BASE] [BOI1] [Image]{} [EOI1] [ROI2] [POS0] [BASE] [BOI2] [MASK]*1024' - elif args.generation_task == 'super-resolution': - query_template = '[ROI1] {} [BASE] [BOI1] [Image]{}' - elif args.generation_task == 'post-selection': - query_template = '[BASE] [BOI1] [Image]{} [EOI1] [ROI1] {}' - elif args.generation_task == 'cuda-2d generation': - query_template = '[ROI1] {} [BASE] [BOI1] [MASK]*1024 [EOI1] [MASK]*4096' - else: - raise NotImplementedError - for raw_text, seq, output_path in get_context(args, query_template): - if args.generation_task == 'super-resolution': - super_resolution(model, args, raw_text, seq, output_path=output_path) - elif args.generation_task == 'post-selection': - post_selection(model, args, raw_text, seq, output_path=output_path) - else: - generate_images_once(model, args, raw_text, seq, num=args.batch_size, output_path=output_path) - -def super_resolution(model, args, raw_text, seq, output_path="./samples"): - tokenizer = get_tokenizer() - model.eval() - if not os.path.exists(output_path): - os.makedirs(output_path) - with torch.no_grad(): - start_time = time.time() - output_tokens_list = magnify(model, tokenizer, seq[-32**2:], seq[:-32**2], args) - - print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True) - print("\nContext:", raw_text, flush=True) - output_file_prefix = raw_text.replace('/', '')[:20] - output_file = os.path.join(output_path, f"{output_file_prefix}-{datetime.now().strftime('%m-%d-%H-%M-%S')}.jpg") - imgs = [] - if args.debug: - imgs.append(torch.nn.functional.interpolate(tokenizer.img_tokenizer.DecodeIds(seq[-32**2:]), size=(512, 512))) - for seq in output_tokens_list: - decoded_txts, decoded_imgs = tokenizer.DecodeIds(seq.tolist()) - imgs.extend(decoded_imgs) - imgs = torch.cat(imgs, dim=0) - print("\nSave to: ", output_file, flush=True) - save_image(imgs, output_file, normalize=True) - -def post_selection(model, args, raw_text, seq, output_path): - tokenizer = get_tokenizer() - model.eval() - if not os.path.exists(output_path): - os.makedirs(output_path) - with torch.no_grad(): - start_time = time.time() - - num = seq.shape[0] - mbz = args.max_inference_batch_size - assert num < mbz or num % mbz == 0 - scores = [inverse_prompt_score(model, seq[tim*mbz:(tim+1)*mbz], args) - for tim in range(max(num // mbz, 1)) - ] - scores = torch.cat(scores, dim=0) - # scores = inverse_prompt_score(model, seq, args) # once - - print("\nTaken time {:.2f}\n".format(time.time() - start_time), flush=True) - print("\nContext:", raw_text, flush=True) - rank = dist.get_rank() - output_file = os.path.join(output_path, f"scores_rank_{rank}.txt") - with open(output_file, 'a') as fout: - fout.write(raw_text+'\n') - fout.write('\t'.join([str(x) for x in scores.tolist()])+'\n') - print("\nSave to: ", output_file, flush=True) - - - -def prepare_tokenizer(args): - - tokenizer = get_tokenizer(args) - - num_tokens = tokenizer.num_tokens - before = num_tokens - after = before - multiple = args.make_vocab_size_divisible_by * \ - mpu.get_model_parallel_world_size() - while (after % multiple) != 0: - after += 1 - print_rank_0('> padded vocab (size: {}) with {} dummy ' - 'tokens (new size: {})'.format( - before, after - before, after)) - - args.vocab_size = after - print("prepare tokenizer done", flush=True) - - return tokenizer - - -def main(): - """Main training program.""" - - print('Generate Samples') - - # Disable CuDNN. - torch.backends.cudnn.enabled = False - - # Arguments. - args = get_args() - - # Pytorch distributed. - initialize_distributed(args) - - # set device, this args.device is only used in inference - if args.device is not None: - device = int(args.device) - torch.cuda.set_device(device) - - # Random seeds for reproducability. - - # get the tokenizer - tokenizer = prepare_tokenizer(args) - - # Model, optimizer, and learning rate. - model = setup_model(args) - set_random_seed(args.seed) - - generate_images_continually(model, args) - -if __name__ == "__main__": - main() diff --git a/generation/magnify.py b/generation/magnify.py index 71f5647..a9651e2 100755 --- a/generation/magnify.py +++ b/generation/magnify.py @@ -11,7 +11,6 @@ import os import sys import math import random -from tqdm import tqdm import numpy as np import torch @@ -30,7 +29,7 @@ def magnify(model, tokenizer, tokens_list, text_token_list, args): magnified_code = code.new_zeros((s * 2, s * 2), dtype=torch.long) - 1 windows = [(0, 0, 18), (0, 1, 30), (0, 2, 30), (1, 1, 30), (1, 0, 30), (1, 2, 30), (2, 0, 32), (2, 1, 32), (2, 2, 32)] - for i, j, line in tqdm(windows): + for i, j, line in windows: code_part = code[8 * i: 8 * (i+2), 8 * j: 8 * (j+2)].reshape(-1) magnified_code_part = magnified_code[16 * i: 16 * i + line, 16 * j: 16 * (j+2)].reshape(-1) diff --git a/generation/sampling.py b/generation/sampling.py index b049ba8..f6b543d 100755 --- a/generation/sampling.py +++ b/generation/sampling.py @@ -11,7 +11,6 @@ import os import sys import math import random -from tqdm import tqdm import numpy as np import torch diff --git a/requirements.txt b/requirements.txt index 08bffa0..1d322aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ torch deepspeed -tqdm lmdb -filelock sentencepiece -mpi4py tensorboardX==1.8 \ No newline at end of file diff --git a/scripts_old/cuda_2d_text2image.sh b/scripts_old/cuda_2d_text2image.sh deleted file mode 100755 index 6a32e31..0000000 --- a/scripts_old/cuda_2d_text2image.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -CHECKPOINT_PATH=data/checkpoints/cogview-base -# CHECKPOINT_PATH=data/checkpoints/cogview-compare -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=5184 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1. -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --sandwich-ln \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --sparse-type cuda_2d \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task "cuda-2d generation" \ - --input-source ./input.txt \ - --output-path samples_cuda_2d3 \ - --batch-size 4 \ - --max-inference-batch-size 4 \ - --device 0 \ - --finetune \ - --no-load-optim \ - --sparse-type cuda_2d \ - --debug \ - $@ - - diff --git a/scripts_old/image2text.sh b/scripts_old/image2text.sh deleted file mode 100755 index 9bcfc75..0000000 --- a/scripts_old/image2text.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -CHECKPOINT_PATH=pretrained/cogview/cogview-caption -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=1089 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1. -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --query-window 64 \ - --key-window-times 4 \ - --num-pivot 256 \ - --is-sparse 0 \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task image2text \ - --input-source interactive \ - --output-path samples_image2text \ - --batch-size 8 \ - --debug \ - --device 1 \ - $@ - - diff --git a/scripts_old/low_level_super_resolution.sh b/scripts_old/low_level_super_resolution.sh deleted file mode 100755 index 1be1f36..0000000 --- a/scripts_old/low_level_super_resolution.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -CHECKPOINT_PATH=pretrained/cogview/cogview-sr -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=1345 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1.02 -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --query-window 64 \ - --key-window-times 4 \ - --num-pivot 256 \ - --is-sparse 0 \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task "low-level super-resolution" \ - --input-source interactive \ - --output-path samples_low_level_sr \ - --batch-size 4 \ - --device 6 \ - $@ - - diff --git a/scripts_old/post_selection.sh b/scripts_old/post_selection.sh deleted file mode 100755 index 21ef553..0000000 --- a/scripts_old/post_selection.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -CHECKPOINT_PATH=pretrained/cogview/cogview-caption -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=1089 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1. -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --query-window 64 \ - --key-window-times 4 \ - --num-pivot 256 \ - --is-sparse 0 \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task post-selection \ - --input-source input_select.txt \ - --output-path samples_post_selection \ - --debug \ - --device 2 \ - $@ -# input-source is split by \t, instead of 4 spaces - diff --git a/scripts_old/pretrain_multiple_nodes.sh b/scripts_old/pretrain_multiple_nodes.sh deleted file mode 100755 index f099f80..0000000 --- a/scripts_old/pretrain_multiple_nodes.sh +++ /dev/null @@ -1,74 +0,0 @@ -#! /bin/bash - -# Change for multinode config - -NUM_WORKERS=19 -NUM_GPUS_PER_WORKER=8 -MP_SIZE=1 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) -main_dir=$(dirname $script_dir) - -# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0" -OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_NET_GDR_LEVEL=2" -HOST_FILE_PATH="hostfile" -# OPTIONS_NCCL="" -# HOST_FILE_PATH="hostfile_single" - -small_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_4leveltokens/zijian/zijian.bin.part_0.cogdata" -full_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_4leveltokens/merge.bin" - -config_json="$script_dir/ds_config_zero.json" -gpt_options=" \ - --experiment-name cogview-base-long \ - --img-tokenizer-num-tokens 8192 \ - --dataset-type CompactBinaryDataset \ - --model-parallel-size ${MP_SIZE} \ - --num-layers 48 \ - --hidden-size 2560 \ - --num-attention-heads 40 \ - --train-iters 300000 \ - --resume-dataloader \ - --train-data ${full_data} \ - --split 949,50,1 \ - --distributed-backend nccl \ - --lr-decay-style cosine \ - --warmup .1 \ - --checkpoint-activations \ - --deepspeed-activation-checkpointing \ - --max-position-embeddings 1089 \ - --max-memory-length 0 \ - --sandwich-ln \ - --txt-loss-scale 0.1 \ - --sparse-type cuda_2d \ - --fp16 \ - --save-interval 2000 \ - --no-load-optim \ - --no-save-optim \ - --eval-interval 1000 \ - --save $main_dir/data/checkpoints \ - --fast-load \ - --load data/checkpoints/cogview-base \ - --finetune -" - -# --finetune - # --save $main_dir/data/checkpoints \ - # --restart-iter 199000 - - - - - -gpt_options="${gpt_options} - --deepspeed \ - --deepspeed_config ${config_json} \ -" - - -run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}" -echo ${run_cmd} -eval ${run_cmd} - -set +x diff --git a/scripts_old/pretrain_single_node.sh b/scripts_old/pretrain_single_node.sh deleted file mode 100755 index 8443636..0000000 --- a/scripts_old/pretrain_single_node.sh +++ /dev/null @@ -1,56 +0,0 @@ -#! /bin/bash - -# Change for multinode config - -NUM_WORKERS=1 -NUM_GPUS_PER_WORKER=8 -MP_SIZE=1 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) -main_dir=$(dirname $script_dir) - -# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0" -OPTIONS_NCCL="NCCL_DEBUG=info" -HOST_FILE_PATH="hostfile_single" - - -config_json="$script_dir/ds_config_zero.json" -gpt_options=" \ - --experiment-name cogview-testlocal \ - --img-tokenizer-num-tokens 8192 \ - --dataset-type BinaryDataset \ - --model-parallel-size ${MP_SIZE} \ - --num-layers 48 \ - --hidden-size 2560 \ - --num-attention-heads 40 \ - --save $main_dir/data/checkpoints \ - --train-iters 100000 \ - --resume-dataloader \ - --train-data /dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/merge.bin \ - --split 949,50,1 \ - --distributed-backend nccl \ - --lr-decay-style cosine \ - --warmup .1 \ - --checkpoint-activations \ - --deepspeed-activation-checkpointing \ - --max-position-embeddings 5184 \ - --max-memory-length 0 \ - --fp16 \ - --txt-loss-scale 2 \ - --sandwich-ln \ - --sparse-type cuda_2d \ - --save-interval 2500 -" - -gpt_options="${gpt_options} - --deepspeed \ - --deepspeed_config ${config_json} \ -" - - -run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}" -echo ${run_cmd} -eval ${run_cmd} - -set +x diff --git a/scripts_old/super_resolution.sh b/scripts_old/super_resolution.sh deleted file mode 100755 index 77cb606..0000000 --- a/scripts_old/super_resolution.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -CHECKPOINT_PATH=pretrained/cogview/cogview-sr -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=1345 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1.02 -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --query-window 64 \ - --key-window-times 4 \ - --num-pivot 256 \ - --is-sparse 0 \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task "super-resolution" \ - --input-source interactive \ - --output-path samples_sr \ - --debug \ - --device 0 \ - $@ - - diff --git a/scripts_old/testnan.sh b/scripts_old/testnan.sh deleted file mode 100755 index 2095c0c..0000000 --- a/scripts_old/testnan.sh +++ /dev/null @@ -1,63 +0,0 @@ -#! /bin/bash - -# Change for multinode config - -NUM_WORKERS=1 -NUM_GPUS_PER_WORKER=1 -MP_SIZE=1 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) -main_dir=$(dirname $script_dir) - -# OPTIONS_NCCL="NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_SOCKET_IFNAME=bond0 NCCL_IB_GID_INDEX=3 NCCL_NET_GDR_LEVEL=0" -OPTIONS_NCCL="NCCL_DEBUG=info" -HOST_FILE_PATH="hostfile_single" - -small_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/zijian/zijian.bin.part_0.cogdata" -full_data="/dataset/fd5061f6/cogview/cogdata_new/cogdata_task_3leveltokens/merge.bin" - -config_json="$script_dir/ds_config.json" -gpt_options=" \ - --experiment-name cogview-testlocal \ - --img-tokenizer-num-tokens 8192 \ - --dataset-type CompactBinaryDataset \ - --model-parallel-size ${MP_SIZE} \ - --num-layers 48 \ - --hidden-size 2560 \ - --num-attention-heads 40 \ - --save $main_dir/data/checkpoints \ - --train-iters 100000 \ - --resume-dataloader \ - --test-data ${full_data} \ - --split 949,50,1 \ - --distributed-backend nccl \ - --lr-decay-style cosine \ - --warmup .1 \ - --checkpoint-activations \ - --deepspeed-activation-checkpointing \ - --max-position-embeddings 1089 \ - --max-memory-length 0 \ - --txt-loss-scale 1 \ - --sandwich-ln \ - --sparse-type standard \ - --save-interval 2500 \ - --fp16 \ - --eval-iters 1000 \ - --load pretrained/cogview/cogview-base -" - # - # --load data/checkpoints/cogview-fixgrad-small08-25-09-38 - - -gpt_options="${gpt_options} - -" - # --deepspeed \ - # --deepspeed_config ${config_json} \ - -run_cmd="${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} pretrain_gpt2.py $@ ${gpt_options}" -echo ${run_cmd} -eval ${run_cmd} - -set +x diff --git a/scripts_old/text2image.sh b/scripts_old/text2image.sh deleted file mode 100755 index 4f2f5d5..0000000 --- a/scripts_old/text2image.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# ==== tutorial settings: ===== -# CHECKPOINT_PATH=data/checkpoints/cogview-bird_animal_tutorial-12-1024-1608-10-09-38 -# NLAYERS=12 -# NHIDDEN=1024 -# NATT=16 - -# CHECKPOINT_PATH=data/checkpoints/cogview-base -CHECKPOINT_PATH=pretrained/cogview/cogview-base -NLAYERS=48 -NHIDDEN=2560 -NATT=40 -MAXSEQLEN=1089 -MASTER_PORT=$(shuf -n 1 -i 10000-65535) -MPSIZE=1 - -#SAMPLING ARGS -TEMP=1 -#If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p -TOPK=200 -TOPP=0 - -script_path=$(realpath $0) -script_dir=$(dirname $script_path) - -MASTER_PORT=${MASTER_PORT} python generate_samples.py \ - --deepspeed \ - --model-parallel-size $MPSIZE \ - --num-layers $NLAYERS \ - --hidden-size $NHIDDEN \ - --load $CHECKPOINT_PATH \ - --num-attention-heads $NATT \ - --max-position-embeddings 1089 \ - --fp16 \ - --temperature $TEMP \ - --top_k $TOPK \ - --top_p $TOPP \ - --sandwich-ln \ - --img-tokenizer-path pretrained/vqvae/vqvae_hard_biggerset_011.pt \ - --sparse-type standard \ - --max-position-embeddings-finetune $MAXSEQLEN \ - --generation-task text2image \ - --input-source ./input.txt \ - --output-path samples_text2image \ - --batch-size 8 \ - --max-inference-batch-size 8 \ - --device 0 \ - --debug \ - $@ - - diff --git a/tokenization/cogview/templates.py b/tokenization/cogview/templates.py index 4d66578..3d78d4c 100755 --- a/tokenization/cogview/templates.py +++ b/tokenization/cogview/templates.py @@ -11,7 +11,6 @@ import os import sys import math import random -from tqdm import tqdm import numpy as np import torch diff --git a/tokenization/cogview/unified_tokenizer.py b/tokenization/cogview/unified_tokenizer.py index d72741d..8a06004 100755 --- a/tokenization/cogview/unified_tokenizer.py +++ b/tokenization/cogview/unified_tokenizer.py @@ -11,7 +11,6 @@ import os import sys import math import random -from tqdm import tqdm import numpy as np import torch diff --git a/tokenization/cogview/vqvae_tokenizer.py b/tokenization/cogview/vqvae_tokenizer.py index 23df2ef..fc40e87 100755 --- a/tokenization/cogview/vqvae_tokenizer.py +++ b/tokenization/cogview/vqvae_tokenizer.py @@ -11,7 +11,6 @@ import os import sys import math import random -from tqdm import tqdm import numpy as np import torch diff --git a/training/deepspeed_training.py b/training/deepspeed_training.py index 80d0e42..90190a6 100644 --- a/training/deepspeed_training.py +++ b/training/deepspeed_training.py @@ -29,11 +29,11 @@ import deepspeed from .learning_rates import AnnealingLR from .model_io import load_checkpoint, save_checkpoint -from utils import Timers -from utils import report_memory -from utils import print_args -from utils import print_rank_0 -from utils import get_sample_writer +from .utils import Timers +from .utils import report_memory +from .utils import print_args +from .utils import print_rank_0 +from .utils import get_sample_writer import mpu from data_utils import make_loaders diff --git a/training/model_io.py b/training/model_io.py index ce434fd..df6b752 100644 --- a/training/model_io.py +++ b/training/model_io.py @@ -16,7 +16,7 @@ import torch import numpy as np import mpu -from utils import print_rank_0 +from .utils import print_rank_0 def get_checkpoint_name(checkpoints_path, iteration, release=False, zero=False): if release: diff --git a/utils.py b/training/utils.py similarity index 100% rename from utils.py rename to training/utils.py -- GitLab