Skip to content

Commit 9681fbd

Browse files
authored
[CI] Add unittest workflow for PaddlePaddle (#1)
1 parent 76cf151 commit 9681fbd

File tree

3 files changed

+191
-2
lines changed

3 files changed

+191
-2
lines changed

.github/workflows/ci-paddle.yml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
name: CI Paddle
2+
on:
3+
push:
4+
branches: [paddle]
5+
tags: ["v*"]
6+
pull_request:
7+
merge_group:
8+
workflow_dispatch:
9+
10+
permissions:
11+
contents: read
12+
13+
concurrency:
14+
group: "${{ github.workflow }}-${{ github.ref }}"
15+
cancel-in-progress: true
16+
17+
jobs:
18+
test:
19+
name: Test
20+
runs-on:
21+
group: H20
22+
timeout-minutes: 30
23+
env:
24+
container_name: tilelang-paddle-test-${{ github.run_id }}
25+
steps:
26+
- name: Check docker image and run container
27+
env:
28+
FLAGS_fraction_of_gpu_memory_to_use: 0.15
29+
CTEST_PARALLEL_LEVEL: 2
30+
WITH_GPU: "ON"
31+
CUDA_ARCH_NAME: Hopper
32+
WITH_AVX: "ON"
33+
PY_VERSION: "3.10"
34+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
35+
no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
36+
run: |
37+
docker_image=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test
38+
docker run -d -t --gpus all --name ${{ env.container_name }} \
39+
-v "/dev/shm:/dev/shm" \
40+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
41+
-v ${{ github.workspace }}:/workspace \
42+
-e FLAGS_fraction_of_gpu_memory_to_use \
43+
-e CTEST_PARALLEL_LEVEL \
44+
-e WITH_GPU \
45+
-e CUDA_ARCH_NAME \
46+
-e WITH_AVX \
47+
-e PY_VERSION \
48+
-e GITHUB_TOKEN \
49+
-e no_proxy \
50+
-w /workspace \
51+
--network host \
52+
${docker_image}
53+
54+
- name: Checkout repository
55+
run: |
56+
docker exec -t ${{ env.container_name }} /bin/bash -c '
57+
set -e
58+
source ${{ github.workspace }}/../../../proxy
59+
git config --global --add safe.directory "*"
60+
# Clean workspace
61+
find . -maxdepth 1 ! -name "." -exec rm -rf {} +
62+
# Checkout
63+
git init
64+
git remote add origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
65+
git fetch origin ${{ github.ref }} --depth=1
66+
git checkout FETCH_HEAD
67+
git submodule update --init --recursive
68+
'
69+
70+
- name: Install dependencies
71+
run: |
72+
docker exec -t ${{ env.container_name }} /bin/bash -c '
73+
set -e
74+
source ${{ github.workspace }}/../../../proxy
75+
76+
# Install uv
77+
curl -LsSf https://astral.sh/uv/install.sh | sh
78+
source $HOME/.local/bin/env
79+
80+
# Create and activate virtual environment
81+
uv venv .venv --seed
82+
source .venv/bin/activate
83+
84+
# Install paddle
85+
uv pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu129/
86+
87+
# Install project and minimal test runner
88+
uv pip install pytest
89+
uv pip install -e .
90+
'
91+
92+
- name: Run tests
93+
run: |
94+
docker exec -t ${{ env.container_name }} /bin/bash -c '
95+
set -e
96+
source .venv/bin/activate
97+
pytest tests_paddle/
98+
'
99+
100+
- name: Terminate and delete the container
101+
if: always()
102+
run: |
103+
set +e
104+
docker stop ${{ env.container_name }}
105+
docker rm ${{ env.container_name }}

.github/workflows/dist-paddle.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
name: Dist
1+
name: Dist Paddle
22
on:
33
push:
4+
branches: [paddle]
45
tags: ["v*"]
6+
pull_request:
7+
merge_group:
58
workflow_dispatch:
69

710
permissions:
@@ -28,7 +31,7 @@ jobs:
2831
# Otherwise, the version of the SDist has a git hash suffix (e.g., 0.1.0+gitabcdef12),
2932
# but the package built from the SDist has no way to get the git hash (it is not a git repo),
3033
# leading to inconsistent versions between SDist and built packages (+gitabcdef12 vs. +gitunknown).
31-
NO_VERSION_LABEL: 'ON'
34+
NO_VERSION_LABEL: "ON"
3235

3336
steps:
3437
- name: Checkout repository

tests_paddle/test_quick_start.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import numpy as np
2+
import paddle
3+
4+
paddle.compat.enable_torch_proxy(scope={"tilelang"})
5+
6+
import tilelang
7+
import tilelang.language as T
8+
9+
10+
# @tilelang.jit(target="cuda")
11+
# target currently can be "cuda" or "hip" or "cpu".
12+
# if not specified, it will be inferred from the input tensors during compile time
13+
@tilelang.jit
14+
def matmul(M, N, K, block_M, block_N, block_K, dtype="float16", accum_dtype="float"):
15+
@T.prim_func
16+
def matmul_relu_kernel(
17+
A: T.Tensor((M, K), dtype),
18+
B: T.Tensor((K, N), dtype),
19+
C: T.Tensor((M, N), dtype),
20+
):
21+
# Initialize Kernel Context
22+
with T.Kernel(T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=128) as (bx, by):
23+
A_shared = T.alloc_shared((block_M, block_K), dtype)
24+
B_shared = T.alloc_shared((block_K, block_N), dtype)
25+
C_local = T.alloc_fragment((block_M, block_N), accum_dtype)
26+
27+
# Enable rasterization for better L2 cache locality (Optional)
28+
# T.use_swizzle(panel_size=10, enable=True)
29+
30+
# Clear local accumulation
31+
T.clear(C_local)
32+
33+
for ko in T.Pipelined(T.ceildiv(K, block_K), num_stages=3):
34+
# Copy tile of A
35+
# This is a sugar syntax for parallelized copy
36+
T.copy(A[by * block_M, ko * block_K], A_shared)
37+
38+
# Copy tile of B
39+
T.copy(B[ko * block_K, bx * block_N], B_shared)
40+
41+
# Perform a tile-level GEMM on the shared buffers
42+
# Currently we dispatch to the cute/hip on Nvidia/AMD GPUs
43+
T.gemm(A_shared, B_shared, C_local)
44+
45+
# relu
46+
for i, j in T.Parallel(block_M, block_N):
47+
C_local[i, j] = T.max(C_local[i, j], 0)
48+
49+
# Copy result back to global memory
50+
T.copy(C_local, C[by * block_M, bx * block_N])
51+
52+
return matmul_relu_kernel
53+
54+
55+
def test_quick_start():
56+
M = 1024 # M = T.dynamic("m") if you want to use dynamic shape
57+
N = 1024
58+
K = 1024
59+
block_M = 128
60+
block_N = 128
61+
block_K = 32
62+
63+
# Define the kernel (matmul) and compile/lower it into an executable module
64+
matmul_relu_kernel = matmul(M, N, K, block_M, block_N, block_K)
65+
# Test the kernel in Python with PyTorch data
66+
import paddle
67+
68+
# Create random input tensors on the GPU
69+
a = paddle.randn(M, K, device="cuda", dtype=paddle.float16)
70+
b = paddle.randn(K, N, device="cuda", dtype=paddle.float16)
71+
c = paddle.empty(M, N, device="cuda", dtype=paddle.float16)
72+
73+
# Run the kernel through the Profiler
74+
matmul_relu_kernel(a, b, c)
75+
76+
print(c)
77+
# Reference multiplication using PyTorch
78+
ref_c = paddle.nn.functional.relu(a @ b)
79+
80+
# Validate correctness
81+
np.testing.assert_allclose(c.numpy(), ref_c.numpy(), rtol=1e-2, atol=1e-2)

0 commit comments

Comments
 (0)