@@ -158,4 +158,145 @@ gpu.module @test {
158158 %c = xegpu.dpas %a , %b : vector <32 x32 xf16 >, vector <32 x32 xf16 > -> vector <32 x32 xf32 >
159159 gpu.return %c : vector <32 x32 xf32 >
160160 }
161+
162+ //-----
163+
164+ // CHECK-LABEL: test_create_tdesc_vec
165+ // CHECK-SAME: [[arg0:%.+]]: ui64
166+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
167+ gpu.func @test_create_tdesc_vec (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
168+ %cst = arith.constant dense <[
169+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
170+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
171+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
172+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
173+ ]> : vector <32 xindex >
174+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
175+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
176+ }
177+
178+ //-----
179+
180+ // CHECK-LABEL: test_create_tdesc_step
181+ // CHECK-SAME: [[arg0:%.+]]: ui64
182+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
183+ gpu.func @test_create_tdesc_step (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
184+ %step = arith.constant dense <8 > : vector <32 xindex >
185+ %seq = vector.step : vector <32 xindex >
186+ %cst = arith.muli %seq , %step : vector <32 xindex >
187+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
188+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
189+ }
190+
191+ //-----
192+
193+ // CHECK-LABEL: test_load
194+ // CHECK-SAME: [[arg0:%.+]]: ui64
195+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
196+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
197+ gpu.func @test_load (%src: ui64 ) -> vector <32 xf32 > {
198+ %cst = arith.constant dense <[
199+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
200+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
201+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
202+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
203+ ]> : vector <32 xindex >
204+
205+ %c17 = arith.constant 17 : index
206+ %mask = vector.create_mask %c17: vector <32 xi1 >
207+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
208+ %ld = xegpu.load %tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
209+
210+ gpu.return %ld : vector <32 xf32 >
211+ }
212+
213+ //-----
214+
215+ // CHECK-LABEL: test_prefetch
216+ // CHECK-SAME: [[arg0:%.+]]: ui64
217+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
218+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
219+ gpu.func @test_prefetch (%src: ui64 ) {
220+
221+ %cst = arith.constant dense <[
222+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
223+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
224+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
225+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
226+ ]> : vector <32 xindex >
227+
228+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
229+
230+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
231+ gpu.return
232+ }
233+
234+ //-----
235+
236+ // CHECK-LABEL: test_store
237+ // CHECK-SAME: [[arg0:%.+]]: ui64
238+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
239+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
240+ gpu.func @test_store (%src: ui64 ) {
241+ %cst = arith.constant dense <[
242+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
243+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
244+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
245+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
246+ ]> : vector <32 xindex >
247+
248+ %c17 = arith.constant 17 : index
249+ %mask = vector.create_mask %c17: vector <32 xi1 >
250+
251+ %st_vec = arith.constant dense <1023.0 >: vector <32 xf32 >
252+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
253+ xegpu.store %st_vec , %tdesc , %mask: vector <32 xf32 >, !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 >
254+
255+ gpu.return
256+ }
257+
258+ //-----
259+
260+ // CHECK-LABEL: test_prefetch_load_store_update
261+ // CHECK-SAME: [[arg0:%.+]]: ui64
262+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
263+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
264+ // CHECK-COUNT-2: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xindex>
265+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
266+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
267+
268+ gpu.func @test_prefetch_load_store_update (%src: ui64 ) {
269+
270+ %cst = arith.constant dense <[
271+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
272+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
273+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
274+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
275+ ]> : vector <32 xindex >
276+
277+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
278+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
279+
280+ %delta = arith.constant dense <[
281+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
282+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 64 ,
283+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 128 ,
284+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 256
285+ ]> : vector <32 xindex >
286+ %new_tdesc = xegpu.update_offset %tdesc , %delta
287+ : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xindex >
288+
289+ %c17 = arith.constant 17 : index
290+ %mask = vector.create_mask %c17: vector <32 xi1 >
291+
292+ %ld_vec = xegpu.load %new_tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
293+
294+ %st_vec = arith.addf %ld_vec , %ld_vec : vector <32 xf32 >
295+ xegpu.store %st_vec , %tdesc , %mask:
296+ vector <32 xf32 >,
297+ !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>,
298+ vector <32 xi1 >
299+
300+ gpu.return
301+ }
161302}
0 commit comments