Add ROCm build for faster-whisper with Wyoming endpoint
Multi-stage Dockerfile builds CTranslate2 v4.0.0 with HIP/ROCm support targeting gfx1030/gfx1031 (RX 6000 series), then installs faster-whisper and wyoming-faster-whisper on top for a Wyoming ASR server on port 10300. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
89
Dockerfile
Normal file
89
Dockerfile
Normal file
@@ -0,0 +1,89 @@
|
||||
ARG ROCM_VERSION=6.2
|
||||
ARG GPU_ARCH="gfx1030;gfx1031"
|
||||
ARG ONEDNN_VERSION=3.1.1
|
||||
|
||||
# ── Stage 1: Build CTranslate2 wheel ─────────────────────────────────────────
|
||||
FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION} AS builder
|
||||
|
||||
ARG GPU_ARCH
|
||||
ARG ONEDNN_VERSION
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
git cmake cmake-curses-gui \
|
||||
libopenblas-dev wget \
|
||||
python3-dev python3-pip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Build oneDNN as a static library (CPU fallback / conv ops)
|
||||
WORKDIR /build
|
||||
RUN wget -q https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \
|
||||
tar xf v${ONEDNN_VERSION}.tar.gz && \
|
||||
rm v${ONEDNN_VERSION}.tar.gz && \
|
||||
cd oneDNN-${ONEDNN_VERSION} && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release \
|
||||
-DONEDNN_LIBRARY_TYPE=STATIC \
|
||||
-DONEDNN_BUILD_EXAMPLES=OFF \
|
||||
-DONEDNN_BUILD_TESTS=OFF \
|
||||
-DONEDNN_ENABLE_WORKLOAD=INFERENCE \
|
||||
"-DONEDNN_ENABLE_PRIMITIVE=CONVOLUTION;REORDER" \
|
||||
-DONEDNN_BUILD_GRAPH=OFF . && \
|
||||
make -j$(nproc) install && \
|
||||
cd /build && rm -rf oneDNN-${ONEDNN_VERSION}
|
||||
|
||||
# Build CTranslate2 with ROCm/HIP
|
||||
ENV CTRANSLATE2_ROOT=/opt/ctranslate2
|
||||
COPY patches/ct2_4.0.0_rocm.patch /tmp/
|
||||
|
||||
RUN git clone --branch v4.0.0 --depth 1 \
|
||||
https://github.com/OpenNMT/CTranslate2.git /build/CTranslate2 && \
|
||||
cd /build/CTranslate2 && \
|
||||
git submodule update --init --recursive && \
|
||||
git apply /tmp/ct2_4.0.0_rocm.patch && \
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_INSTALL_PREFIX=${CTRANSLATE2_ROOT} \
|
||||
-DWITH_CUDA=ON \
|
||||
-DWITH_CUDNN=ON \
|
||||
-DWITH_MKL=OFF \
|
||||
-DWITH_DNNL=ON \
|
||||
-DOPENMP_RUNTIME=COMP \
|
||||
-DCMAKE_HIP_ARCHITECTURES="${GPU_ARCH}" \
|
||||
-DGPU_TARGETS="${GPU_ARCH}" \
|
||||
-DAMDGPU_TARGETS="${GPU_ARCH}" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGPU_RUNTIME=HIP \
|
||||
-DWITH_OPENBLAS=ON \
|
||||
-DENABLE_CPU_DISPATCH=OFF \
|
||||
-DBUILD_TESTS=OFF .. && \
|
||||
make -j$(nproc) install
|
||||
|
||||
# Build Python wheel
|
||||
RUN cd /build/CTranslate2/python && \
|
||||
pip install --no-cache-dir -r install_requirements.txt && \
|
||||
python3 setup.py bdist_wheel --dist-dir /wheels
|
||||
|
||||
# ── Stage 2: Runtime ──────────────────────────────────────────────────────────
|
||||
FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}
|
||||
|
||||
ENV CTRANSLATE2_ROOT=/opt/ctranslate2
|
||||
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CTRANSLATE2_ROOT}/lib"
|
||||
# ROCm HIP allocator settings for stable inference
|
||||
ENV CT2_CUDA_ALLOCATOR=cub_caching
|
||||
ENV CT2_CUDA_CACHING_ALLOCATOR_CONFIG=4,3,12,419430400
|
||||
|
||||
COPY --from=builder /opt/ctranslate2 /opt/ctranslate2
|
||||
COPY --from=builder /wheels /wheels
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-pip ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& pip install --no-cache-dir /wheels/*.whl \
|
||||
&& pip install --no-cache-dir \
|
||||
"faster-whisper>=1.2.1,<2" \
|
||||
"wyoming-faster-whisper>=3.1.1"
|
||||
|
||||
RUN mkdir /data
|
||||
VOLUME /data
|
||||
EXPOSE 10300
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "wyoming_faster_whisper"]
|
||||
CMD ["--uri", "tcp://0.0.0.0:10300", "--data-dir", "/data", "--model", "Systran/faster-distil-whisper-small.en", "--device", "cuda", "--compute-type", "float16"]
|
||||
29
docker-compose.yml
Normal file
29
docker-compose.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
services:
|
||||
wyoming-faster-whisper:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
ROCM_VERSION: "6.2"
|
||||
GPU_ARCH: "gfx1030;gfx1031"
|
||||
image: rocm-faster-whisper:latest
|
||||
devices:
|
||||
- /dev/kfd
|
||||
- /dev/dri
|
||||
group_add:
|
||||
- video
|
||||
- render
|
||||
ports:
|
||||
- "10300:10300"
|
||||
volumes:
|
||||
- ./data:/data
|
||||
environment:
|
||||
# Override GFX version if ROCm doesn't natively support your exact chip.
|
||||
# gfx1031 (RX 6000-series variants) can be told to behave as gfx1030.
|
||||
- HSA_OVERRIDE_GFX_VERSION=10.3.0
|
||||
command: >
|
||||
--uri tcp://0.0.0.0:10300
|
||||
--data-dir /data
|
||||
--model Systran/faster-distil-whisper-small.en
|
||||
--device cuda
|
||||
--compute-type float16
|
||||
restart: unless-stopped
|
||||
1163
patches/ct2_4.0.0_rocm.patch
Executable file
1163
patches/ct2_4.0.0_rocm.patch
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user