Dockerfiles for setting up pytorch for AMD RoCM
Jump to navigation
Jump to search
ROCM install script
#!/bin/bash
# ROCm 6.1.3 Setup for Ubuntu 20.04.6 and Ubuntu 22.04.4 DT and Server build
# =============================================================================
# This script will automatically install ROCm 6.1.3 for Ubuntu 20.04.6 and 22.04.3 automatically downloading the correct
# install script in non interactive mode.
#
# Requirements
# OS: Ubuntu Server 20.04.6 LTS (Focal Fossa) or Ubuntu Server 22.04.4 LTS (Jammy Jellyfish)
# Kernel: 5.15.0-117 and 6.8
#
# Software
# ROCm(TM) Platform: 6.1.3 / rocBLAS 4.1.2.60103 /hipBLAS 2.1.0.60103
# Release: https://rocm.docs.amd.com/en/latest/about/release-notes.html
# Tools: git (version control system used for tracking changes in computer files)
# htop (monitoring - dynamic overview of running processes)
# ncdu (NCurses Disk Usage utility, which provides a text-based interface for viewing disk usage)
# cmake (CMake is an open-source, cross-platform family of tools designed to build, test, and package software)
# libmsgpack-dev (development package for MessagePack, a binary serialization format. MessagePack is designed to be efficient in both size and speed, making it a popular choice for data interchange in performance-sensitive applications)
# msgpack (binary serialization format that is efficient in both size and speed, making it an attractive alternative to text-based serialization formats like JSON and XML)
# vllm, Flash Attention 2 (CK + Triton) from source
# rocBLASlt from source
# Author: Joerg Roskowetz
# Script process time: ~10 minutes (depending on system and internet configuration)
# Date: July 30th 2024
# global stdout method
function print () {
printf "\033[1;36m\t$1\033[1;35m\n"; sleep 4
}
clear &&
print '\nAMD ROCm 6.1.3 installation auto-detecting installed Ubuntu (20.04.x or 22.04.x) DT and Server version\n'
print 'Linux OS System Update ...\n'
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt -y upgrade
print '\nDone\n'
install_focal() {
print '\nUbuntu 20.04.x (focal) installation method has been set.\n'
# Download the installer script
wget https://repo.radeon.com/amdgpu-install/6.1.3/ubuntu/focal/amdgpu-install_6.1.60103-1_all.deb
}
install_jellyfish() {
print '\nUbuntu 22.04.x (jammy jellyfish) installation method has been set.\n'
# Download the installer script
wget https://repo.radeon.com/amdgpu-install/6.1.3/ubuntu/jammy/amdgpu-install_6.1.60103-1_all.deb
# install latest headers and static library files necessary for building C++ programs which use libstdc++
sudo DEBIAN_FRONTEND=noninteractive apt-get install libstdc++-12-dev --yes
pip3 install joblib
}
# Check if supported Ubuntu release exists
if command -v lsb_release > /dev/null; then
UBUNTU_CODENAME=$(lsb_release -c -s)
if [ "$UBUNTU_CODENAME" = "focal" ]; then
print '\nDetected Ubuntu Focal Fossa (20.04.x).\n'
install_focal
elif [ "$UBUNTU_CODENAME" = "jammy" ]; then
print '\nDetected Ubuntu Jammy Jellyfish (22.04.x).\n'
install_jellyfish
else
print '\nUnknown Ubuntu version!\n'
fi
else
print '\nlsb_release command not found. Unable to determine Ubuntu version.\n'
fi
# Install with "default" settings (no interaction)
sudo DEBIAN_FRONTEND=noninteractive apt install ./amdgpu-install_6.1.60103-1_all.deb --yes
# Installing multiple use cases including ROCm 6.1.3, OCL and HIP SDK
print '\nInstalling ROCm 6.1.3 environment with graphics, rocm and hiplibsdk parameter ...\n'
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive amdgpu-install --usecase=graphics,rocm,hiplibsdk --yes
# Groups setup and ROCm/OCL path in global *.icd file
# Add path into global amdocl64*.icd file
echo "/opt/rocm/lib/libamdocl64.so" | sudo tee /etc/OpenCL/vendors/amdocl64*.icd
# Add the current user to the "video" and "render" group
sudo usermod -a -G video,render ${SUDO_USER:-$USER}
sudo usermod -aG sudo ${SUDO_USER:-$USER} # add the user to the sudo group (iportant e.g. to compile vllm, flashattention in a pip environment)
# 2. Install Tools - git, htop, cmake, python3.10-venv, libmsgpack-dev, ncdu (NCurses Disk Usage utility / df -h) and msgpack
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y git htop cmake python3.10-venv libmsgpack-dev ncdu
# Download rocBLAS for ROCm 6.1 from source
git clone -b release/rocm-rel-6.1 https://github.com/ROCm/rocBLAS.git
# Download vLLM repository (pip3 install -U -r requirements-rocm.txt)
git clone https://github.com/vllm-project/vllm.git
# Download Flash Attention 2 (CK - composable kernel) repository from source
git clone https://github.com/ROCm/flash-attention.git
# Download Flash Attention 2 (Triton) repository from source
git clone https://github.com/ROCm/triton.git
print '\nFinished ROCm 6.1.3 environment installation and setup.\n'
print 'After the reboot test your installation with typing "rocminfo" or "clinfo" or "rocm-smi".\n'
print 'Compiling your own rocBLAS with the installed gfx... ID can be done with - cd rocBLAS - sudo ./install.sh.\n'
# reboot option
print 'Reboot system now (recommended)? (y/n)'
read q
if [ $q == "y" ]; then
for i in 3 2 1
do
printf "Reboot in $i ...\r"; sleep 1
done
sudo reboot
fi
Install torch, vllm and flash attention 2
#!/bin/bash
# Machine Learning (ML) module installing vLLM 0.5.3 + FA 2.0.4 + Pytroch 2.5.0 @ROCm6.1 and a 8B or 70B LLM chatbot based on LLaMA3
# =============================================================================
#
# Requirements
# OS: Ubuntu Server 20.04.6 LTS (Focal Fossa) or Ubuntu Server 22.04.4 LTS (Jammy Jellyfish)
# Kernel: 5.15.0-117 and 6.8
# ROCm 6.1.3 (tested)
#
# Tools: vLLM 0.5.3.post1+rocm614 compiled from source
# Flash Attention 2.0.4 compiled from source
# Torch 2.5.0.dev20240710+rocm6.1
#
# Author: Joerg Roskowetz
# Script process time: ~20 minutes (depending on system and internet configuration)
# Date: July 29th 2024
# global stdout method
function print () {
printf "\033[1;36m\t$1\033[1;35m\n"; sleep 4
}
clear &&
print '\nBuilding vLLM 0.5.3 and Flash Attention 2.0.4 based on PyTorch 2.5.0 from source downloading VAGOsolutions LLaMA 3 8B chatbot model and python script\n'
print '\nInstalling Torch 2.5.0\n'
pip3 install --upgrade pip
pip3 install joblib
# Install PyTorch
pip3 uninstall torch -y # in case there is a older version
pip3 install --no-cache-dir --pre torch==2.5.0.dev20240710 --index-url https://download.pytorch.org/whl/nightly/rocm6.1
#updated sept24
pip3 install --no-cache-dir --pre torch==2.5.0.dev20240912+rocm6.1 --index-url https://download.pytorch.org/whl/nightly/rocm6.1
# Build & install AMD SMI
pip3 install /opt/rocm/share/amd_smi
# Install dependencies
pip3 install --upgrade numba scipy huggingface-hub[cli]
pip3 install "numpy<2"
print '\nDownloading vllm repository and installing requirements\n'
# Download vLLM repository (pip3 install -U -r requirements-rocm.txt)
cd && rm -R vllm
git clone https://github.com/vllm-project/vllm.git
cd && cd vllm
pip3 install -r requirements-rocm.txt
# Apply the patch to ROCM 6.1 (requires root permission)
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib
rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so*
print '\nBuild vllm from source based on MI210 (gfx90a) - If you need a different GPU like MI300X set gfx ID to gfx942 in the script\n'
# Build vLLM for MI210/MI250/MI300.
# export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
export PYTORCH_ROCM_ARCH="gfx90a" # Build vLLM for MI210
cd && cd vllm
python3 setup.py develop
pip3 uninstall jinja2
pip3 install jinja2>=3.1.0
# chatbot script
cd && echo 'import time
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
# Initialize the model and tokenizer
model = LLM("VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct")
tokenizer = AutoTokenizer.from_pretrained("VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct")
def chatbot_prompt(user_message, min_tokens=256, max_tokens=256, temperature=0.7, top_p=0.8, repetition_penalty=1.05):
# Define the conversation history
messages = [{"role": "user", "content": user_message}]
# Format the prompt
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Set sampling parameters
sampling_params = SamplingParams(temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, max_tokens=max_tokens)
# Measure latency for generating the output
start_time = time.time()
output = model.generate([formatted_prompt] * 1, sampling_params=sampling_params)
end_time = time.time()
latency = end_time - start_time
# Extract and return the generated text
response_text = ""
for request_output in output:
for completion in request_output.outputs:
response_text += completion.text
return response_text, latency
# Example usage
user_messages = [
"Tell me about the impact of AI on modern society.",
"How does machine learning differ from traditional programming?"
]
for user_message in user_messages:
response, latency = chatbot_prompt(user_message)
print(f"User: {user_message}")
print(f"Chatbot: {response}")
print(f"Latency: {latency:.2f} seconds\n")
' >> chatbot_vllm.py
print '\nDone\n'
print '\nYour vLLM version can be verified by "pip3 show vllm"\n'
print '\nStart the Chatbot by "python3 vllm_chatbot.py - the script is set to run 1 batch, 256 max tokens and temperature 0.7."\n'