diff --git a/README.md b/README.md index d9e9d8354e8e7284789aad635fbc2bfa0e1515b9..09ad7b9a0f0c673965481f204dfd06c607b85423 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ Llama 2 is a new technology that carries potential risks with use. Testing condu # Table of Contents 1. [Quick start](#quick-start) -2. [Fine-tuning](#fine-tuning) +2. [Model Conversion](#model-conversion-to-hugging-face) +3. [Fine-tuning](#fine-tuning) - [Single GPU](#single-gpu) - [Multi GPU One Node](#multiple-gpus-one-node) - [Multi GPU Multi Node](#multi-gpu-multi-node) -3. [Inference](./docs/inference.md) -4. [Model Conversion](#model-conversion-to-hugging-face) +4. [Inference](./docs/inference.md) 5. [Repository Organization](#repository-organization) 6. [License and Acceptable Use Policy](#license) @@ -46,6 +46,23 @@ pip install -r requirements.txt **Please note that the above requirements.txt will install PyTorch 2.0.1 version, in case you want to run FSDP + PEFT, please make sure to install PyTorch nightlies.** +# Model conversion to Hugging Face +The recipes and notebooks in this folder are using the Llama 2 model definition provided by Hugging Face's transformers library. + +Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with: + +```bash +## Install HuggingFace Transformers from source +pip freeze | grep transformers ## verify it is version 4.31.0 or higher + +```bash +git clone git@github.com:huggingface/transformers.git +cd transformers +pip install protobuf +python src/transformers/models/llama/convert_llama_weights_to_hf.py \ + --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path +``` + # Fine-tuning For fine-tuning Llama 2 models for your domain-specific use cases recipes for PEFT, FSDP, PEFT+FSDP have been included along with a few test datasets. For details see [LLM Fine-tuning](./docs/LLM_finetuning.md). @@ -112,20 +129,6 @@ sbatch multi_node.slurm You can read more about our fine-tuning strategies [here](./docs/LLM_finetuning.md). -# Model conversion to Hugging Face -The recipes and notebooks in this folder are using the Llama 2 model definition provided by Hugging Face's transformers library. - -Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with: - -```bash -## Install HuggingFace Transformers from source -pip install git+https://github.com/huggingface/transformers -cd transformers - -python src/transformers/models/llama/convert_llama_weights_to_hf.py \ - --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir models_hf/7B -``` - # Repository Organization This repository is organized in the following way: diff --git a/docs/inference.md b/docs/inference.md index 144431bb2c6e22a6541bd0bd2ab1e97815137ce0..48a3c8eeedb086c6061bf5c719e9fcfa71a1e69f 100644 --- a/docs/inference.md +++ b/docs/inference.md @@ -31,7 +31,7 @@ inference/samsum_prompt.txt The inference folder also includes a chat completion example, that adds built-in safety features in fine-tuned models to the prompt tokens. To run the example: ```bash -python chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file chats.json --quantization --use_auditnlg +python inference/chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file inference/chats.json --quantization --use_auditnlg ``` diff --git a/scripts/markdown_link_check_config.json b/scripts/markdown_link_check_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c17a7b8739123d141a267bf283907fc076aa2dbe --- /dev/null +++ b/scripts/markdown_link_check_config.json @@ -0,0 +1,24 @@ +{ + "retryOn429": true, + "retryCount": 5, + "fallbackRetryDelay": "10s", + "httpHeaders": [ + { + "urls": [ + "https://docs.github.com/", + "https://help.github.com/" + ], + "headers": { + "Accept-Encoding": "zstd, br, gzip, deflate" + } + } + ], + "ignorePatterns": [ + { + "pattern": "^http(s)?://127.0.0.1.*" + }, + { + "pattern": "^http(s)?://localhost.*" + } + ] +} diff --git a/scripts/spellcheck.sh b/scripts/spellcheck.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f423d5037cafa310a2761e395dc0dba9d270214 --- /dev/null +++ b/scripts/spellcheck.sh @@ -0,0 +1,20 @@ +# Source: https://github.com/pytorch/torchx/blob/main/scripts/spellcheck.sh +set -ex +sudo apt-get install aspell + +if [[ -z "$@" ]]; then + sources=$(find -name '*.md') +else + sources=$@ +fi + +sources_arg="" +for src in $sources; do + sources_arg="${sources_arg} -S $src" +done + +if [ ! "$sources_arg" ]; then + echo "No files to spellcheck" +else + pyspelling -c scripts/spellcheck_conf/spellcheck.yaml --name Markdown $sources_arg +fi diff --git a/scripts/spellcheck_conf/spellcheck.yaml b/scripts/spellcheck_conf/spellcheck.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a9a2df84e3d068badd43752c0fd74164c8150dc --- /dev/null +++ b/scripts/spellcheck_conf/spellcheck.yaml @@ -0,0 +1,22 @@ +matrix: +- name: Markdown + apsell: + lang: en + d: en_US + dictionary: + wordlists: + - scripts/spellcheck_conf/wordlist.txt + output: scripts/spellcheck_conf/wordlist.dic + encoding: utf-8 + pipeline: + - pyspelling.filters.context: + context_visible_first: true + delimiters: + - open: '(?s)^ *(?P<open>`{3,})[a-z0-9]*?$' + close: '^(?P=open)$' + - open: '' + content: 'https?://[-a-zA-Z0-9.]+?\.[a-z]{2,6}[-?=&%.0-9a-zA-Z/_#]*' + close: '' + - pyspelling.filters.markdown: + markdown_extensions: + - markdown.extensions.extra: diff --git a/scripts/spellcheck_conf/wordlist.txt b/scripts/spellcheck_conf/wordlist.txt new file mode 100644 index 0000000000000000000000000000000000000000..77ef92c0954d80f64ea0b34e9a1b11f4afcc6130 --- /dev/null +++ b/scripts/spellcheck_conf/wordlist.txt @@ -0,0 +1,1070 @@ +BaseHandler +ImageNet +RGB +TorchServe +archiver +dataset +github +href +https +json +li +py +pytorch +segmenter +torchvision +ul +usecase +CUDA +JDK +NVIDIA +WSL +bashrc +cd +githubusercontent +html +microsoft +ol +openjdk +OpenJDK +pre +psutil +sentencepiece +src +sudo +torchtext +ubuntu +wget +APIs +Eg +MilliSeconds +URI +YAML +dataflow +func +lt +md +params +postprocess +postprocessing +preprocess +preprocessing +serializable +tbody +td +th +thead +unregister +url +CONFIG +MNIST +README +hotdogs +ncs +squeezenet +vgg +TorchServe's +cfg +configs +runtime +yyyyMMddHHmmssSSS +AWS +Benchmarking +Captum +Grafana +HuggingFace +JMeter +KMS +Kubeflow +Kubernetes +MMF +contrib +ddb +gRPC +ipynb +mlflow +nmt +performant +torschripted +API's +ASG +Django +Dockerfile +ELB +LoadBalancer +OpenAPI +PyPi +SDK +SageMaker +blockquote +cli +cloudformation +cmd +dev +dir +io +issuecomment +lxning +netty +perf +presigned +tagname +txt +ConfigManager +GPL +NVSMI +Powershell +Redistributable +env +exe +frontend +msi +nodejs +npm +prebuilt +smi +stackoverflow +util +AlexNet +DeepLabV +Densenet +FCN +RCNN +ResNet +Torchscripted +fastrcnn +jpg +maskrcnn +png +KFServing +Seldon +ai +analytics +orchestrator +PMD +backend +checkstyle +cov +gradlew +htmlcov +node.js +pylint +pylintrc +pytest +rcfile +tcort +ut +localhost +myworkflow +wfpredict +Bytearray +CN +CORS +EventLoopGroup +EventLoops +GPUs +JVM +MaxDirectMemorySize +OU +OpenSSL +PCI +PIL +PKCS +PYTHONPATH +Palo +RSA +SSL +WorkerThread +amazonaws +async +batchSize +changeit +dalay +defaultVersion +dep +dname +envvars +genkey +gpu +gz +keyalg +keyout +keysize +keystore +keytool +livebook +marName +maxBatchDelay +maxWorkers +minWorkers +modelName +msec +mycert +mykey +natively +newkey +noop +parameterName +parameterNameN +parameterValue +parameterValueN +pathname +pem +preflight +readthedocs +req +responseTimeout +scalability +storepass +storetype +urls +utf +vmargs +wlm +www +yourdomain +nextPageToken +subfolder +unregistering +workflowDag +workflowName +workflowUrl +Javascript +RESTful +codegen +Args +CustomImageClassifier +DefaultHandlerClass +ImageClassifier +Init +LayerIntegratedGradients +ModelHandler +NDArray +PredictionException +Preprocessed +RuntimeError +Waveglow +cpu +embeddings +fp +ie +isfile +isinstance +jit +kwargs +os +param +pred +pth +pyt +serializedFile +str +tacotron +utils +vCPUs +waveglowpyt +DL +LJO +MiB +cv +dockerd +entrypoint +gpuId +gpuUsage +inferencing +loadedAtStartup +memoryUsage +milli +modelUrl +modelVersion +pid +startTime +Captum's +InferenceAPIsService +ModelServer +br +kf +proto +CPUUtilization +DiskAvailable +DiskUsage +DiskUsed +DiskUtilization +DistanceInKM +HostName +InferenceTime +JSONLayout +LoopCount +MemoryAvailable +MemoryUsed +MemoryUtilization +MetricName +SizeOfImage +StatsD +appender +dimN +etsy +formatter +idx +img +kB +DescribeModel +ListModels +RegisterModel +ScaleWorker +SetDefault +UnregisterModel +gRPCs +grpcio +mkdir +protobuf +protoc +repo +BackendWorker +ConversionPattern +Dlog +MaxBackupIndex +MaxFileSize +PatternLayout +RollingFileAppender +WorkerLifeCycle +apache +nnvm +stderr +stdout +ConflictStatusException +DownloadModelException +InvalidSnapshotException +ModelNotFoundException +NoSuchMethodError +ServiceUnavailableException +lang +mb +ntl +PrometheusServer +globoff +noopversioned +systemctl +uuid +yml +AWSS +AmazonS +IAM +ManagementAPIsService +ReadOnlyAccess +UserGuide +UsingKMSEncryption +acknowledgement +macOS +sse +fairseq +libs +mv +pretrained +publically +ready-made +tmp +torchscript +torchvision's +handerl +Bitte +Bonjour +Hallo +Hause +Ich +Ihnen +Ihren +Je +Namen +Sie +TransformerEn +Und +WMT +Wie +allez +arxiv +auf +bien +chez +danke +dataclasses +dich +du +english +erinnere +et +fb +geht +german +komm +kommst +le +leid +läuft +m'excuser +merci +mich +mir +monde +möglich +nFine +nIt’s +nPlease +nach +ne +nicht +nom +prie +quand +rentrerez +selbst +sich +sind +souviens +tôt +va +venir +votre +vous +wann +warte +Ça +BERTQA +BERTSeqClassification +BERTTokenClassification +MFreidank +RoBERTA +XLM +distilbert +does't +finetuning +num +tc +tokenizer +vidhya +vocabs +AutoConfig +Huggingface's +ScriptFunction +transfomers +BBM +BaseDataset +BaseDatasetBuilder +BaseModel +FNSio +MMFTransformer +MultiModal +OmegaConfing +Pyav +REU +TextCaps +TextVQA +Tochserve +csv +datasets +facebook +facebookresearch +fbclid +getitem +lables +len +mc +mmfartifacts +EmbeddingBag +TextHandler +overriden +DBUILD +DCMAKE +DSM +EFFT +FasterTransformer +NGC +Transfomer +bytedance +cmake +cp +geforce +libpyt +nvcr +oauthtoken +turing +volta +xlarge +DeepLearningExamples +SpeechSynthesis +WaveGlow's +librosa +numpy +rb +scipy +unidecode +wav +wb +Interoperability +Mtail +Sart +chmod +cnn +mtailtarget +progs +rc +timeseries +xvzf +cuda +jdk +nvidia +torchserve +wsl +yaml +api +config +http +mnist +resnet +Huggingface +PyTorch +benchmarking +bert +captum +grpc +kubeflow +kubernetes +Torchserve's +asg +aws +elb +readme +sdk +apis +powershell +alexnet +deeplabv +densenet +fcn +kfserving +seldon +excuted +findbugs +HTTPs +cors +openssl +prometheus +rsa +ssl +gpus +init +waveglow +hostname +statsd +grafana +kms +userguide +readymade +torchscripted +rcnn +roberta +xlm +Basedataset +mmf +multimodal +preprocessed +batchsize +download +fastertransformer +ngc +deeplearningexamples +mtail +scarpe +NVidia +WaveGlow +huggingface +torchServe +CProfile +KSERVE +apachelounge +args +jmeter +kserve +latencies +snakeviz +codec +loadbalancer +torchserves +xml +Conda +autoscaling +conda +GPUMemoryUsed +GPUMemoryUtilization +GPUUtilization +JSONPatternLayout +MXNetModelServer +QLog +QLogLayout +QLogsetupModelDependencies +abc +dda +patternlayout +qlog +IPEX +ORT +PROFILER +TensorRT +ValueToSet +kineto +profiler +pypi +runtimes +torchprep +GPT +KServe +LMHeadModel +Parallelize +Textgeneration +gpt +kserve +parallelize +tx +xl +DCGAN +DLRM +GAN +NN +Recommender +ScriptModule +Scriptable +TorchRec +TorchScript +Torchrec +dcgan +dlrm +fashiongen +FashionGen +fashionGen +gan +nn +scriptable +torchrec +AVX +Allocator +BLOCKTIME +BertModel +CONDA +JeMalloc +KMP +LD +NUMA +Numa +OMP +OpenMP +PRELOAD +PTMalloc +TCMalloc +Xeon +afeeb +affinitized +allocator +args +eval +gif +hyperthreaded +hyperthreading +inplace +inputPath +intel +iomp +ipex +iter +jemalloc +libiomp +libtcmalloc +numa +numactl +pdt +qconfig +randint +randn +tcmalloc +tunable +unix +unutilized +usr +CONTAINERD +DaemonSet +GKE +Gcloud +Gi +GoogleCloudPlatform +Ki +NFS +PV +PersistentVolume +RWX +STORAGECLASS +VPC +allocatable +auth +autoupgrade +bcc +cidr +clusterIP +creationTimestamp +daemonset +drwx +drwxr +fsSL +gcloud +ggc +gke +googleapis +ip +ipv +jsonpath +kubeconfig +kubectl +lR +mynfs +namespaces +nfs +nodePools +persistentvolume +persistentvolumeclaim +po +preloaded +provisioner +pv +pvc +quickstart +rw +svc +tesla +tty +unformatted +AAAAAElFTkSuQmCC +Autoscaler +BUILDKIT +GOR +InferenceService +Knative +Rollout +inferenceservice +ingressgateway +istio +kfs +knative +loadBalancer +mnt +modelCount +readmes +rollout +serverless +recommender +HandlerTime +customizedMetadata +environ +ContentType +kservev +tobytes +CustomHandler +GH +OSS +PRs +ctx +onnx +ClusterConfig +EBS +EFS +EKS +apiVersion +desiredCapacity +efs +eks +eksctl +instanceTypes +instancesDistribution +maxSize +minSize +namespace +ng +nodeGroups +onDemandBaseCapacity +onDemandPercentageAboveBaseCapacity +pvpod +spotInstancePools +storagehttps +subnet +subnets +vpc +MMS +commandline +filepath +jmx +rampup +requestdefaults +scaleup +tearDown +testplan +JProfiler +JProfiler's +SqueezeNet +TSBenchmark +apos +cProfile +dockerhub +filesystem +filterresults +gradle +homebrew +imageFilePath +jpgc +linuxbrew +mergeresults +modelN +perfmon +urlN +Arg +KFserving +arg +authn +authz +dicts +dockerfiles +enum +eventloop +hashmap +lifecycles +sagemaker +startServer +threadpool +mGPU +socio +gridfs +NLP +TorchScript's +Meta's +criteo +personalization +NMTBackTranslate +NMTDualTranslate +nlp +DogCatBreed +DogCatBreedClassification +CloudWatch +LogGroup +TorchServeInferenceURL +TorchServeManagementURL +cloudwatch +keypair +spinup +ReactApp +logdir +tensorboard +DenseNet +pytorchbot +Validator +comparator +validator +validators +Datafile +UI +buildspec +cmds +AKS +PVCs +DockerHub +jq +HPA +HPG +targetValue +totensor +KFServer +TSModelRepository +TorchserveModel +Torchservemodel +kfserve +kfserver +KFModel +marfile +AKS +Balancer +EFK +Liveness +autoscale +datasource +helmignore +lookingup +mountpath +Az +VM +aks +az +ds +eastus +myAKSCluster +myResourceGroup +sc +vm +CODEBUILD +CodeBuild +Dockerfiles +bt +buildtype +codebuild +cudaversion +cudnn +memlock +shm +ulimit +Cresta's +DAGs +Dynabench +Dynaboard +MLFlow +MLOps +MLflow +Operationalize +Sagemaker +Streamlit +Inferentia +opensource +operationalising +Wadhwani +modelarchive +eagermode +AttributeName +AttributeType +DDBEndPoint +DDBSnapshotSerializer +DefaultCredentialsProvider +FS +IndexName +KeySchema +KeyType +PluginsManager +ProjectionType +ProvisionedThroughput +ReadCapacityUnits +SDKs +WriteCapacityUnits +createdOn +createdOnMonth +dynamodb +impl +serializer +servingsdk +snapshotName +behaviour +teardown +tg +udv +dataN +backendgroup +sexualized +ecbe +grayscale +bz +marsgen +efft +envvar +Roadmap +fff +pvd +whl +ss +dn +rn +De +ec +VQA +xxxx +Affero +MinIO +fs +fsspec +minioadmin +pythonic +DeepSpeed +MII +deepspeed +mii +Diffusers +diffusers +AzureML +Largemodels +bigscience +mem +sharded +NVfuser +fuser +ort +sess +dali +BetterTransformer +TransformerEncoder +InferenceTimeInMS +MetricTypes +MetricsCache +TIMM +backends +inductor +Integrations +integrations +UseCases +usecases +Explainability +TorchData +px +svg +nvfuser +noborder +datapipes +tensorrt +vec +torchdata +CodeQL +Dependabot +Snyk +pythonversion +StreamPredictions +LLMs +MPS +mps +deviceIds +rpc +pippy +MBS +MicroBatching +MicroBatchingHandler +QPS +PiPPy +Microbatching +Micro-batching +microbatch +microbatching +DeviceId +PredictionTime +QueueTime +WorkerLoadTime +WorkerName +WorkerThreadTime +MicroSoft +lmi +torchrun +nproc +largemodels +torchpippy +InferenceSession +maxRetryTimeoutInSec +neuronx +AMI +DLAMI +XLA +inferentia +ActionSLAM +statins +ci +chatGPT +Llama +PEFT +LORA +FSDP \ No newline at end of file