diff --git a/README.md b/README.md
index d9e9d8354e8e7284789aad635fbc2bfa0e1515b9..09ad7b9a0f0c673965481f204dfd06c607b85423 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,12 @@ Llama 2 is a new technology that carries potential risks with use. Testing condu
 
 # Table of Contents
 1. [Quick start](#quick-start)
-2. [Fine-tuning](#fine-tuning)
+2. [Model Conversion](#model-conversion-to-hugging-face)
+3. [Fine-tuning](#fine-tuning)
     - [Single GPU](#single-gpu)
     - [Multi GPU One Node](#multiple-gpus-one-node)
     - [Multi GPU Multi Node](#multi-gpu-multi-node)
-3. [Inference](./docs/inference.md)
-4. [Model Conversion](#model-conversion-to-hugging-face)
+4. [Inference](./docs/inference.md)
 5. [Repository Organization](#repository-organization)
 6. [License and Acceptable Use Policy](#license)
 
@@ -46,6 +46,23 @@ pip install -r requirements.txt
 
 **Please note that the above requirements.txt will install PyTorch 2.0.1 version, in case you want to run FSDP + PEFT, please make sure to install PyTorch nightlies.**
 
+# Model conversion to Hugging Face
+The recipes and notebooks in this folder are using the Llama 2 model definition provided by Hugging Face's transformers library.
+
+Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with:
+
+```bash
+## Install HuggingFace Transformers from source
+pip freeze | grep transformers ## verify it is version 4.31.0 or higher
+
+```bash
+git clone git@github.com:huggingface/transformers.git
+cd transformers
+pip install protobuf
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+   --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+```
+
 # Fine-tuning
 
 For fine-tuning Llama 2 models for your domain-specific use cases recipes for PEFT, FSDP, PEFT+FSDP have been included along with a few test datasets. For details see [LLM Fine-tuning](./docs/LLM_finetuning.md).
@@ -112,20 +129,6 @@ sbatch multi_node.slurm
 You can read more about our fine-tuning strategies [here](./docs/LLM_finetuning.md).
 
 
-# Model conversion to Hugging Face
-The recipes and notebooks in this folder are using the Llama 2 model definition provided by Hugging Face's transformers library.
-
-Given that the original checkpoint resides under models/7B you can install all requirements and convert the checkpoint with:
-
-```bash
-## Install HuggingFace Transformers from source
-pip install git+https://github.com/huggingface/transformers
-cd transformers
-
-python src/transformers/models/llama/convert_llama_weights_to_hf.py \
-    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir models_hf/7B
-```
-
 # Repository Organization
 This repository is organized in the following way:
 
diff --git a/docs/inference.md b/docs/inference.md
index 144431bb2c6e22a6541bd0bd2ab1e97815137ce0..48a3c8eeedb086c6061bf5c719e9fcfa71a1e69f 100644
--- a/docs/inference.md
+++ b/docs/inference.md
@@ -31,7 +31,7 @@ inference/samsum_prompt.txt
 The inference folder also includes a chat completion example, that adds built-in safety features in fine-tuned models to the prompt tokens. To run the example:
 
 ```bash
-python chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file chats.json  --quantization --use_auditnlg
+python inference/chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file inference/chats.json  --quantization --use_auditnlg
 
 ```
 
diff --git a/scripts/markdown_link_check_config.json b/scripts/markdown_link_check_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c17a7b8739123d141a267bf283907fc076aa2dbe
--- /dev/null
+++ b/scripts/markdown_link_check_config.json
@@ -0,0 +1,24 @@
+{
+  "retryOn429": true,
+  "retryCount": 5,
+  "fallbackRetryDelay": "10s",
+  "httpHeaders": [
+    {
+      "urls": [
+        "https://docs.github.com/",
+        "https://help.github.com/"
+      ],
+      "headers": {
+        "Accept-Encoding": "zstd, br, gzip, deflate"
+      }
+    }
+  ],
+  "ignorePatterns": [
+    {
+      "pattern": "^http(s)?://127.0.0.1.*"
+    },
+    {
+      "pattern": "^http(s)?://localhost.*"
+    }
+  ]
+}
diff --git a/scripts/spellcheck.sh b/scripts/spellcheck.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7f423d5037cafa310a2761e395dc0dba9d270214
--- /dev/null
+++ b/scripts/spellcheck.sh
@@ -0,0 +1,20 @@
+# Source: https://github.com/pytorch/torchx/blob/main/scripts/spellcheck.sh
+set -ex
+sudo apt-get install aspell
+
+if [[ -z "$@" ]]; then
+    sources=$(find -name '*.md')
+else
+    sources=$@
+fi
+
+sources_arg=""
+for src in $sources; do
+        sources_arg="${sources_arg} -S $src"
+done
+
+if [ ! "$sources_arg" ]; then
+	echo "No files to spellcheck"
+else
+	pyspelling -c scripts/spellcheck_conf/spellcheck.yaml --name Markdown $sources_arg
+fi
diff --git a/scripts/spellcheck_conf/spellcheck.yaml b/scripts/spellcheck_conf/spellcheck.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a9a2df84e3d068badd43752c0fd74164c8150dc
--- /dev/null
+++ b/scripts/spellcheck_conf/spellcheck.yaml
@@ -0,0 +1,22 @@
+matrix:
+- name: Markdown
+  apsell:
+    lang: en
+    d: en_US
+  dictionary:
+    wordlists:
+    - scripts/spellcheck_conf/wordlist.txt
+    output: scripts/spellcheck_conf/wordlist.dic
+    encoding: utf-8
+  pipeline:
+  - pyspelling.filters.context:
+      context_visible_first: true
+      delimiters:
+      - open: '(?s)^ *(?P<open>`{3,})[a-z0-9]*?$'
+        close: '^(?P=open)$'
+      - open: ''
+        content: 'https?://[-a-zA-Z0-9.]+?\.[a-z]{2,6}[-?=&%.0-9a-zA-Z/_#]*'
+        close: ''
+  - pyspelling.filters.markdown:
+      markdown_extensions:
+      - markdown.extensions.extra:
diff --git a/scripts/spellcheck_conf/wordlist.txt b/scripts/spellcheck_conf/wordlist.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77ef92c0954d80f64ea0b34e9a1b11f4afcc6130
--- /dev/null
+++ b/scripts/spellcheck_conf/wordlist.txt
@@ -0,0 +1,1070 @@
+BaseHandler
+ImageNet
+RGB
+TorchServe
+archiver
+dataset
+github
+href
+https
+json
+li
+py
+pytorch
+segmenter
+torchvision
+ul
+usecase
+CUDA
+JDK
+NVIDIA
+WSL
+bashrc
+cd
+githubusercontent
+html
+microsoft
+ol
+openjdk
+OpenJDK
+pre
+psutil
+sentencepiece
+src
+sudo
+torchtext
+ubuntu
+wget
+APIs
+Eg
+MilliSeconds
+URI
+YAML
+dataflow
+func
+lt
+md
+params
+postprocess
+postprocessing
+preprocess
+preprocessing
+serializable
+tbody
+td
+th
+thead
+unregister
+url
+CONFIG
+MNIST
+README
+hotdogs
+ncs
+squeezenet
+vgg
+TorchServe's
+cfg
+configs
+runtime
+yyyyMMddHHmmssSSS
+AWS
+Benchmarking
+Captum
+Grafana
+HuggingFace
+JMeter
+KMS
+Kubeflow
+Kubernetes
+MMF
+contrib
+ddb
+gRPC
+ipynb
+mlflow
+nmt
+performant
+torschripted
+API's
+ASG
+Django
+Dockerfile
+ELB
+LoadBalancer
+OpenAPI
+PyPi
+SDK
+SageMaker
+blockquote
+cli
+cloudformation
+cmd
+dev
+dir
+io
+issuecomment
+lxning
+netty
+perf
+presigned
+tagname
+txt
+ConfigManager
+GPL
+NVSMI
+Powershell
+Redistributable
+env
+exe
+frontend
+msi
+nodejs
+npm
+prebuilt
+smi
+stackoverflow
+util
+AlexNet
+DeepLabV
+Densenet
+FCN
+RCNN
+ResNet
+Torchscripted
+fastrcnn
+jpg
+maskrcnn
+png
+KFServing
+Seldon
+ai
+analytics
+orchestrator
+PMD
+backend
+checkstyle
+cov
+gradlew
+htmlcov
+node.js
+pylint
+pylintrc
+pytest
+rcfile
+tcort
+ut
+localhost
+myworkflow
+wfpredict
+Bytearray
+CN
+CORS
+EventLoopGroup
+EventLoops
+GPUs
+JVM
+MaxDirectMemorySize
+OU
+OpenSSL
+PCI
+PIL
+PKCS
+PYTHONPATH
+Palo
+RSA
+SSL
+WorkerThread
+amazonaws
+async
+batchSize
+changeit
+dalay
+defaultVersion
+dep
+dname
+envvars
+genkey
+gpu
+gz
+keyalg
+keyout
+keysize
+keystore
+keytool
+livebook
+marName
+maxBatchDelay
+maxWorkers
+minWorkers
+modelName
+msec
+mycert
+mykey
+natively
+newkey
+noop
+parameterName
+parameterNameN
+parameterValue
+parameterValueN
+pathname
+pem
+preflight
+readthedocs
+req
+responseTimeout
+scalability
+storepass
+storetype
+urls
+utf
+vmargs
+wlm
+www
+yourdomain
+nextPageToken
+subfolder
+unregistering
+workflowDag
+workflowName
+workflowUrl
+Javascript
+RESTful
+codegen
+Args
+CustomImageClassifier
+DefaultHandlerClass
+ImageClassifier
+Init
+LayerIntegratedGradients
+ModelHandler
+NDArray
+PredictionException
+Preprocessed
+RuntimeError
+Waveglow
+cpu
+embeddings
+fp
+ie
+isfile
+isinstance
+jit
+kwargs
+os
+param
+pred
+pth
+pyt
+serializedFile
+str
+tacotron
+utils
+vCPUs
+waveglowpyt
+DL
+LJO
+MiB
+cv
+dockerd
+entrypoint
+gpuId
+gpuUsage
+inferencing
+loadedAtStartup
+memoryUsage
+milli
+modelUrl
+modelVersion
+pid
+startTime
+Captum's
+InferenceAPIsService
+ModelServer
+br
+kf
+proto
+CPUUtilization
+DiskAvailable
+DiskUsage
+DiskUsed
+DiskUtilization
+DistanceInKM
+HostName
+InferenceTime
+JSONLayout
+LoopCount
+MemoryAvailable
+MemoryUsed
+MemoryUtilization
+MetricName
+SizeOfImage
+StatsD
+appender
+dimN
+etsy
+formatter
+idx
+img
+kB
+DescribeModel
+ListModels
+RegisterModel
+ScaleWorker
+SetDefault
+UnregisterModel
+gRPCs
+grpcio
+mkdir
+protobuf
+protoc
+repo
+BackendWorker
+ConversionPattern
+Dlog
+MaxBackupIndex
+MaxFileSize
+PatternLayout
+RollingFileAppender
+WorkerLifeCycle
+apache
+nnvm
+stderr
+stdout
+ConflictStatusException
+DownloadModelException
+InvalidSnapshotException
+ModelNotFoundException
+NoSuchMethodError
+ServiceUnavailableException
+lang
+mb
+ntl
+PrometheusServer
+globoff
+noopversioned
+systemctl
+uuid
+yml
+AWSS
+AmazonS
+IAM
+ManagementAPIsService
+ReadOnlyAccess
+UserGuide
+UsingKMSEncryption
+acknowledgement
+macOS
+sse
+fairseq
+libs
+mv
+pretrained
+publically
+ready-made
+tmp
+torchscript
+torchvision's
+handerl
+Bitte
+Bonjour
+Hallo
+Hause
+Ich
+Ihnen
+Ihren
+Je
+Namen
+Sie
+TransformerEn
+Und
+WMT
+Wie
+allez
+arxiv
+auf
+bien
+chez
+danke
+dataclasses
+dich
+du
+english
+erinnere
+et
+fb
+geht
+german
+komm
+kommst
+le
+leid
+lÃ¤uft
+m'excuser
+merci
+mich
+mir
+monde
+mÃ¶glich
+nFine
+nItâ€™s
+nPlease
+nach
+ne
+nicht
+nom
+prie
+quand
+rentrerez
+selbst
+sich
+sind
+souviens
+tÃ´t
+va
+venir
+votre
+vous
+wann
+warte
+Ã‡a
+BERTQA
+BERTSeqClassification
+BERTTokenClassification
+MFreidank
+RoBERTA
+XLM
+distilbert
+does't
+finetuning
+num
+tc
+tokenizer
+vidhya
+vocabs
+AutoConfig
+Huggingface's
+ScriptFunction
+transfomers
+BBM
+BaseDataset
+BaseDatasetBuilder
+BaseModel
+FNSio
+MMFTransformer
+MultiModal
+OmegaConfing
+Pyav
+REU
+TextCaps
+TextVQA
+Tochserve
+csv
+datasets
+facebook
+facebookresearch
+fbclid
+getitem
+lables
+len
+mc
+mmfartifacts
+EmbeddingBag
+TextHandler
+overriden
+DBUILD
+DCMAKE
+DSM
+EFFT
+FasterTransformer
+NGC
+Transfomer
+bytedance
+cmake
+cp
+geforce
+libpyt
+nvcr
+oauthtoken
+turing
+volta
+xlarge
+DeepLearningExamples
+SpeechSynthesis
+WaveGlow's
+librosa
+numpy
+rb
+scipy
+unidecode
+wav
+wb
+Interoperability
+Mtail
+Sart
+chmod
+cnn
+mtailtarget
+progs
+rc
+timeseries
+xvzf
+cuda
+jdk
+nvidia
+torchserve
+wsl
+yaml
+api
+config
+http
+mnist
+resnet
+Huggingface
+PyTorch
+benchmarking
+bert
+captum
+grpc
+kubeflow
+kubernetes
+Torchserve's
+asg
+aws
+elb
+readme
+sdk
+apis
+powershell
+alexnet
+deeplabv
+densenet
+fcn
+kfserving
+seldon
+excuted
+findbugs
+HTTPs
+cors
+openssl
+prometheus
+rsa
+ssl
+gpus
+init
+waveglow
+hostname
+statsd
+grafana
+kms
+userguide
+readymade
+torchscripted
+rcnn
+roberta
+xlm
+Basedataset
+mmf
+multimodal
+preprocessed
+batchsize
+download
+fastertransformer
+ngc
+deeplearningexamples
+mtail
+scarpe
+NVidia
+WaveGlow
+huggingface
+torchServe
+CProfile
+KSERVE
+apachelounge
+args
+jmeter
+kserve
+latencies
+snakeviz
+codec
+loadbalancer
+torchserves
+xml
+Conda
+autoscaling
+conda
+GPUMemoryUsed
+GPUMemoryUtilization
+GPUUtilization
+JSONPatternLayout
+MXNetModelServer
+QLog
+QLogLayout
+QLogsetupModelDependencies
+abc
+dda
+patternlayout
+qlog
+IPEX
+ORT
+PROFILER
+TensorRT
+ValueToSet
+kineto
+profiler
+pypi
+runtimes
+torchprep
+GPT
+KServe
+LMHeadModel
+Parallelize
+Textgeneration
+gpt
+kserve
+parallelize
+tx
+xl
+DCGAN
+DLRM
+GAN
+NN
+Recommender
+ScriptModule
+Scriptable
+TorchRec
+TorchScript
+Torchrec
+dcgan
+dlrm
+fashiongen
+FashionGen
+fashionGen
+gan
+nn
+scriptable
+torchrec
+AVX
+Allocator
+BLOCKTIME
+BertModel
+CONDA
+JeMalloc
+KMP
+LD
+NUMA
+Numa
+OMP
+OpenMP
+PRELOAD
+PTMalloc
+TCMalloc
+Xeon
+afeeb
+affinitized
+allocator
+args
+eval
+gif
+hyperthreaded
+hyperthreading
+inplace
+inputPath
+intel
+iomp
+ipex
+iter
+jemalloc
+libiomp
+libtcmalloc
+numa
+numactl
+pdt
+qconfig
+randint
+randn
+tcmalloc
+tunable
+unix
+unutilized
+usr
+CONTAINERD
+DaemonSet
+GKE
+Gcloud
+Gi
+GoogleCloudPlatform
+Ki
+NFS
+PV
+PersistentVolume
+RWX
+STORAGECLASS
+VPC
+allocatable
+auth
+autoupgrade
+bcc
+cidr
+clusterIP
+creationTimestamp
+daemonset
+drwx
+drwxr
+fsSL
+gcloud
+ggc
+gke
+googleapis
+ip
+ipv
+jsonpath
+kubeconfig
+kubectl
+lR
+mynfs
+namespaces
+nfs
+nodePools
+persistentvolume
+persistentvolumeclaim
+po
+preloaded
+provisioner
+pv
+pvc
+quickstart
+rw
+svc
+tesla
+tty
+unformatted
+AAAAAElFTkSuQmCC
+Autoscaler
+BUILDKIT
+GOR
+InferenceService
+Knative
+Rollout
+inferenceservice
+ingressgateway
+istio
+kfs
+knative
+loadBalancer
+mnt
+modelCount
+readmes
+rollout
+serverless
+recommender
+HandlerTime
+customizedMetadata
+environ
+ContentType
+kservev
+tobytes
+CustomHandler
+GH
+OSS
+PRs
+ctx
+onnx
+ClusterConfig
+EBS
+EFS
+EKS
+apiVersion
+desiredCapacity
+efs
+eks
+eksctl
+instanceTypes
+instancesDistribution
+maxSize
+minSize
+namespace
+ng
+nodeGroups
+onDemandBaseCapacity
+onDemandPercentageAboveBaseCapacity
+pvpod
+spotInstancePools
+storagehttps
+subnet
+subnets
+vpc
+MMS
+commandline
+filepath
+jmx
+rampup
+requestdefaults
+scaleup
+tearDown
+testplan
+JProfiler
+JProfiler's
+SqueezeNet
+TSBenchmark
+apos
+cProfile
+dockerhub
+filesystem
+filterresults
+gradle
+homebrew
+imageFilePath
+jpgc
+linuxbrew
+mergeresults
+modelN
+perfmon
+urlN
+Arg
+KFserving
+arg
+authn
+authz
+dicts
+dockerfiles
+enum
+eventloop
+hashmap
+lifecycles
+sagemaker
+startServer
+threadpool
+mGPU
+socio
+gridfs
+NLP
+TorchScript's
+Meta's
+criteo
+personalization
+NMTBackTranslate
+NMTDualTranslate
+nlp
+DogCatBreed
+DogCatBreedClassification
+CloudWatch
+LogGroup
+TorchServeInferenceURL
+TorchServeManagementURL
+cloudwatch
+keypair
+spinup
+ReactApp
+logdir
+tensorboard
+DenseNet
+pytorchbot
+Validator
+comparator
+validator
+validators
+Datafile
+UI
+buildspec
+cmds
+AKS
+PVCs
+DockerHub
+jq
+HPA
+HPG
+targetValue
+totensor
+KFServer
+TSModelRepository
+TorchserveModel
+Torchservemodel
+kfserve
+kfserver
+KFModel
+marfile
+AKS
+Balancer
+EFK
+Liveness
+autoscale
+datasource
+helmignore
+lookingup
+mountpath
+Az
+VM
+aks
+az
+ds
+eastus
+myAKSCluster
+myResourceGroup
+sc
+vm
+CODEBUILD
+CodeBuild
+Dockerfiles
+bt
+buildtype
+codebuild
+cudaversion
+cudnn
+memlock
+shm
+ulimit
+Cresta's
+DAGs
+Dynabench
+Dynaboard
+MLFlow
+MLOps
+MLflow
+Operationalize
+Sagemaker
+Streamlit
+Inferentia
+opensource
+operationalising
+Wadhwani
+modelarchive
+eagermode
+AttributeName
+AttributeType
+DDBEndPoint
+DDBSnapshotSerializer
+DefaultCredentialsProvider
+FS
+IndexName
+KeySchema
+KeyType
+PluginsManager
+ProjectionType
+ProvisionedThroughput
+ReadCapacityUnits
+SDKs
+WriteCapacityUnits
+createdOn
+createdOnMonth
+dynamodb
+impl
+serializer
+servingsdk
+snapshotName
+behaviour
+teardown
+tg
+udv
+dataN
+backendgroup
+sexualized
+ecbe
+grayscale
+bz
+marsgen
+efft
+envvar
+Roadmap
+fff
+pvd
+whl
+ss
+dn
+rn
+De
+ec
+VQA
+xxxx
+Affero
+MinIO
+fs
+fsspec
+minioadmin
+pythonic
+DeepSpeed
+MII
+deepspeed
+mii
+Diffusers
+diffusers
+AzureML
+Largemodels
+bigscience
+mem
+sharded
+NVfuser
+fuser
+ort
+sess
+dali
+BetterTransformer
+TransformerEncoder
+InferenceTimeInMS
+MetricTypes
+MetricsCache
+TIMM
+backends
+inductor
+Integrations
+integrations
+UseCases
+usecases
+Explainability
+TorchData
+px
+svg
+nvfuser
+noborder
+datapipes
+tensorrt
+vec
+torchdata
+CodeQL
+Dependabot
+Snyk
+pythonversion
+StreamPredictions
+LLMs
+MPS
+mps
+deviceIds
+rpc
+pippy
+MBS
+MicroBatching
+MicroBatchingHandler
+QPS
+PiPPy
+Microbatching
+Micro-batching
+microbatch
+microbatching
+DeviceId
+PredictionTime
+QueueTime
+WorkerLoadTime
+WorkerName
+WorkerThreadTime
+MicroSoft
+lmi
+torchrun
+nproc
+largemodels
+torchpippy
+InferenceSession
+maxRetryTimeoutInSec
+neuronx
+AMI
+DLAMI
+XLA
+inferentia
+ActionSLAM
+statins
+ci
+chatGPT
+Llama
+PEFT
+LORA
+FSDP
\ No newline at end of file