Dockerfiles for setting up pytorch for AMD RoCM

ROCM install script

#!/bin/bash
# ROCm 6.1.3 Setup for Ubuntu 20.04.6 and Ubuntu 22.04.4 DT and Server build
# =============================================================================
# This script will automatically install ROCm 6.1.3 for Ubuntu 20.04.6 and 22.04.3 automatically downloading the correct
# install script in non interactive mode.
#
# Requirements
# OS:                   Ubuntu Server 20.04.6 LTS (Focal Fossa) or Ubuntu Server 22.04.4 LTS (Jammy Jellyfish)
# Kernel:               5.15.0-117 and 6.8
#
# Software
# ROCm(TM) Platform:    6.1.3 / rocBLAS 4.1.2.60103 /hipBLAS 2.1.0.60103
# Release:              https://rocm.docs.amd.com/en/latest/about/release-notes.html
# Tools:                git (version control system used for tracking changes in computer files)
#                       htop (monitoring - dynamic overview of running processes)
#                       ncdu (NCurses Disk Usage utility, which provides a text-based interface for viewing disk usage)
#                       cmake (CMake is an open-source, cross-platform family of tools designed to build, test, and package software)
#                       libmsgpack-dev (development package for MessagePack, a binary serialization format. MessagePack is designed to be efficient in both size and speed, making it a popular choice for data interchange in performance-sensitive applications)
#                       msgpack (binary serialization format that is efficient in both size and speed, making it an attractive alternative to text-based serialization formats like JSON and XML)
#                       vllm, Flash Attention 2 (CK + Triton) from source
#                       rocBLASlt from source
# Author: Joerg Roskowetz
# Script process time: ~10 minutes (depending on system and internet configuration)
# Date: July 30th 2024

# global stdout method
function print () {
    printf "\033[1;36m\t$1\033[1;35m\n"; sleep 4
}

clear &&
print '\nAMD ROCm 6.1.3 installation auto-detecting installed Ubuntu (20.04.x or 22.04.x) DT and Server version\n'
print 'Linux OS System Update ...\n'

sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt -y upgrade

print '\nDone\n'

install_focal() {
    print '\nUbuntu 20.04.x (focal) installation method has been set.\n'
    # Download the installer script
    wget https://repo.radeon.com/amdgpu-install/6.1.3/ubuntu/focal/amdgpu-install_6.1.60103-1_all.deb
}

install_jellyfish() {
    print '\nUbuntu 22.04.x (jammy jellyfish) installation method has been set.\n'
    # Download the installer script
    wget https://repo.radeon.com/amdgpu-install/6.1.3/ubuntu/jammy/amdgpu-install_6.1.60103-1_all.deb
    # install latest headers and static library files necessary for building C++ programs which use libstdc++
    sudo DEBIAN_FRONTEND=noninteractive apt-get install libstdc++-12-dev --yes
    pip3 install joblib
}

# Check if supported Ubuntu release exists
if command -v lsb_release > /dev/null; then
    UBUNTU_CODENAME=$(lsb_release -c -s)

    if [ "$UBUNTU_CODENAME" = "focal" ]; then
        print '\nDetected Ubuntu Focal Fossa (20.04.x).\n'

install_focal

    elif [ "$UBUNTU_CODENAME" = "jammy" ]; then
        print '\nDetected Ubuntu Jammy Jellyfish (22.04.x).\n'

install_jellyfish

    else
        print '\nUnknown Ubuntu version!\n'
    fi
else
    print '\nlsb_release command not found. Unable to determine Ubuntu version.\n'
fi

# Install with "default" settings (no interaction)

sudo DEBIAN_FRONTEND=noninteractive apt install ./amdgpu-install_6.1.60103-1_all.deb --yes

# Installing multiple use cases including ROCm 6.1.3, OCL and HIP SDK

print '\nInstalling ROCm 6.1.3 environment with graphics, rocm and hiplibsdk parameter ...\n'

sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive amdgpu-install --usecase=graphics,rocm,hiplibsdk --yes

# Groups setup and ROCm/OCL path in global *.icd file
# Add path into global amdocl64*.icd file

echo "/opt/rocm/lib/libamdocl64.so" | sudo tee /etc/OpenCL/vendors/amdocl64*.icd

# Add the current user to the "video" and "render" group

sudo usermod -a -G video,render ${SUDO_USER:-$USER}
sudo usermod -aG sudo ${SUDO_USER:-$USER} # add the user to the sudo group (iportant e.g. to compile vllm, flashattention in a pip environment)

# 2. Install Tools - git, htop, cmake, python3.10-venv, libmsgpack-dev, ncdu (NCurses Disk Usage utility / df -h) and msgpack

sudo DEBIAN_FRONTEND=noninteractive apt-get install -y git htop cmake python3.10-venv libmsgpack-dev ncdu

# Download rocBLAS for ROCm 6.1 from source
git clone -b release/rocm-rel-6.1 https://github.com/ROCm/rocBLAS.git

# Download vLLM repository (pip3 install -U -r requirements-rocm.txt)
git clone https://github.com/vllm-project/vllm.git

# Download Flash Attention 2 (CK - composable kernel) repository from source
git clone https://github.com/ROCm/flash-attention.git

# Download Flash Attention 2 (Triton) repository from source
git clone https://github.com/ROCm/triton.git

print '\nFinished ROCm 6.1.3 environment installation and setup.\n'
print 'After the reboot test your installation with typing "rocminfo" or "clinfo" or "rocm-smi".\n'
print 'Compiling your own rocBLAS with the installed gfx... ID can be done with - cd rocBLAS - sudo ./install.sh.\n'

# reboot option
print 'Reboot system now (recommended)? (y/n)'
read q
if [ $q == "y" ]; then
    for i in 3 2 1
    do
        printf "Reboot in $i ...\r"; sleep 1
    done
    sudo reboot
fi

Install torch, vllm and flash attention 2

#!/bin/bash
# Machine Learning (ML) module installing vLLM 0.5.3 + FA 2.0.4 + Pytroch 2.5.0 @ROCm6.1 and a 8B or 70B LLM chatbot based on LLaMA3
# =============================================================================
#
# Requirements
# OS:                   Ubuntu Server 20.04.6 LTS (Focal Fossa) or Ubuntu Server 22.04.4 LTS (Jammy Jellyfish)
# Kernel:               5.15.0-117 and 6.8
#                       ROCm 6.1.3 (tested)
#
# Tools:                vLLM 0.5.3.post1+rocm614 compiled from source 
#                       Flash Attention 2.0.4 compiled from source
#                       Torch 2.5.0.dev20240710+rocm6.1
#                       
# Author: Joerg Roskowetz
# Script process time: ~20 minutes (depending on system and internet configuration)
# Date: July 29th 2024

# global stdout method
function print () {
    printf "\033[1;36m\t$1\033[1;35m\n"; sleep 4
}

clear &&
print '\nBuilding vLLM 0.5.3 and Flash Attention 2.0.4 based on PyTorch 2.5.0 from source downloading VAGOsolutions LLaMA 3 8B chatbot model and python script\n'

print '\nInstalling Torch 2.5.0\n'

pip3 install --upgrade pip
pip3 install joblib
# Install PyTorch
pip3 uninstall torch -y # in case there is a older version
pip3 install --no-cache-dir --pre torch==2.5.0.dev20240710 --index-url https://download.pytorch.org/whl/nightly/rocm6.1
#updated sept24
pip3 install --no-cache-dir --pre torch==2.5.0.dev20240912+rocm6.1 --index-url https://download.pytorch.org/whl/nightly/rocm6.1

# Build & install AMD SMI
pip3 install /opt/rocm/share/amd_smi

# Install dependencies
pip3 install --upgrade numba scipy huggingface-hub[cli]
pip3 install "numpy<2"

print '\nDownloading vllm repository and installing requirements\n'

# Download vLLM repository (pip3 install -U -r requirements-rocm.txt)
cd && rm -R vllm
git clone https://github.com/vllm-project/vllm.git
cd && cd vllm
pip3 install -r requirements-rocm.txt

# Apply the patch to ROCM 6.1 (requires root permission)
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib
rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so*

print '\nBuild vllm from source based on MI210 (gfx90a) - If you need a different GPU like MI300X set gfx ID to gfx942 in the script\n'

# Build vLLM for MI210/MI250/MI300.
# export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
export PYTORCH_ROCM_ARCH="gfx90a" # Build vLLM for MI210
cd && cd vllm
python3 setup.py develop

pip3 uninstall jinja2
pip3 install jinja2>=3.1.0

# chatbot script

cd && echo 'import time
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer

# Initialize the model and tokenizer
model = LLM("VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct")
tokenizer = AutoTokenizer.from_pretrained("VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct")

def chatbot_prompt(user_message, min_tokens=256, max_tokens=256, temperature=0.7, top_p=0.8, repetition_penalty=1.05):
    # Define the conversation history
    messages = [{"role": "user", "content": user_message}]

    # Format the prompt
    formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Set sampling parameters
    sampling_params = SamplingParams(temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, max_tokens=max_tokens)

    # Measure latency for generating the output
    start_time = time.time()
    output = model.generate([formatted_prompt] * 1, sampling_params=sampling_params)
    end_time = time.time()
    latency = end_time - start_time

    # Extract and return the generated text
    response_text = ""
    for request_output in output:
        for completion in request_output.outputs:
            response_text += completion.text
    return response_text, latency

# Example usage
user_messages = [
    "Tell me about the impact of AI on modern society.",
    "How does machine learning differ from traditional programming?"
]

for user_message in user_messages:
    response, latency = chatbot_prompt(user_message)
    print(f"User: {user_message}")
    print(f"Chatbot: {response}")
    print(f"Latency: {latency:.2f} seconds\n")

' >> chatbot_vllm.py

print '\nDone\n'
print '\nYour vLLM version can be verified by "pip3 show vllm"\n'
print '\nStart the Chatbot by "python3 vllm_chatbot.py - the script is set to run 1 batch, 256 max tokens and temperature 0.7."\n'

Dockerfiles for setting up pytorch for AMD RoCM

ROCM install script

Install torch, vllm and flash attention 2

Navigation menu

Search