diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/Makefile b/tools/aie-chess-simulation/mixed_bfp16_bf16/Makefile new file mode 100644 index 00000000000..532b5b1ab49 --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/Makefile @@ -0,0 +1,15 @@ +all: build sim +.PHONY : all build sim clean + +build: + rm -rf output && mkdir output && xchesscc --aiearch aie2p -p me -C Release_LLVM -D__AIENGINE__ - -I ${AIETOOLS_ROOT}/include -I ${AIETOOLS_ROOT}/include/aie_api -P ${AIETOOLS_ROOT}/data/aie2p/lib -d -f -g +s +w work +o work -I. -I $../../../.. test.cc + +sim: + xca_udm_dbg --aiearch aie2p -qf -T -P ${AIETOOLS_ROOT}/data/aie2p/lib -t "sim.tcl work/a.out" + +clean: + rm -rf work *txt *mem *.output output + + + + diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/aie_kernel_utils.h b/tools/aie-chess-simulation/mixed_bfp16_bf16/aie_kernel_utils.h new file mode 100644 index 00000000000..7d1a4d2162b --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/aie_kernel_utils.h @@ -0,0 +1,79 @@ +/* + Copyright (C) 2014 - 2022 Xilinx, Inc. All rights reserved. + Copyright (C) 2022 - 2025 Advanced Micro Devices, Inc. All rights reserved. + SPDX-License-Identifier: MIT +*/ + +#ifndef _AIE_KERNEL_UTILS_ +#define _AIE_KERNEL_UTILS_ + +#if defined(__chess__) +#define AIE_LOOP_UNROLL(x) [[chess::unroll_loop(x)]] +#define AIE_LOOP_UNROLL_FULL [[chess::unroll_loop()]] +#define AIE_LOOP_NO_UNROLL [[chess::no_unroll]] +#define AIE_LOOP_MIN_ITERATION_COUNT(x) [[chess::min_loop_count(x)]] +#define AIE_LOOP_MAX_ITERATION_COUNT(x) [[chess::max_loop_count(x)]] +#define AIE_LOOP_RANGE(a, ...) \ + [[chess::min_loop_count(a)]] __VA_OPT__( \ + [[chess::max_loop_count(__VA_ARGS__)]]) +#define AIE_PREPARE_FOR_PIPELINING [[chess::prepare_for_pipelining]] +#define AIE_NO_PREPARE_FOR_PIPELINING [[chess::no_prepare_for_pipelining]] +#define AIE_MODULO_SCHEDULING_BUDGET_RATIO(x) \ + [[chess::modulo_scheduling_budget_ratio(x)]] +#define AIE_KEEP_SW_LOOP [[chess::keep_sw_loop]] +#define AIE_PEEL_PIPELINED_LOOP(x) [[chess::peel_pipelined_loop(x)]] +#define AIE_KEEP_FREE_FOR_PIPELINING(x) [[chess::keep_free_for_pipelining(x)]] +#define AIE_ALLOCATE(x) [[chess::allocate(x)]] +#define AIE_NO_HW_LOOP [[chess::no_hw_loop]] +#define AIE_TRY_INITIATION_INTERVAL(x) +#define AIE_PREPARE_FOR_POSTPIPELINING +#define AIE_LOOP_FLATTEN chess_flatten_loop + +#elif defined(__AIECC__) +#ifndef __STRINGIFY +#define __STRINGIFY(a) #a +#endif +#define AIE_LOOP_UNROLL(x) _Pragma(__STRINGIFY(clang loop unroll_count(x))) +#define AIE_LOOP_UNROLL_FULL _Pragma("clang loop unroll(full)") +#define AIE_LOOP_NO_UNROLL _Pragma("clang loop unroll(disable)") +#define AIE_LOOP_MIN_ITERATION_COUNT(x) \ + _Pragma(__STRINGIFY(clang loop min_iteration_count(x))) +#define AIE_LOOP_MAX_ITERATION_COUNT(x) \ + _Pragma(__STRINGIFY(clang loop max_iteration_count(x))) +#define AIE_LOOP_RANGE(a, ...) \ + AIE_LOOP_MIN_ITERATION_COUNT(a) \ + __VA_OPT__(AIE_LOOP_MAX_ITERATION_COUNT(__VA_ARGS__)) +#define AIE_PREPARE_FOR_PIPELINING +#define AIE_NO_PREPARE_FOR_PIPELINING +#define AIE_MODULO_SCHEDULING_BUDGET_RATIO(x) +#define AIE_KEEP_SW_LOOP +#define AIE_PEEL_PIPELINED_LOOP(x) +#define AIE_KEEP_FREE_FOR_PIPELINING(x) +#define AIE_ALLOCATE(x) +#define AIE_NO_HW_LOOP +#define AIE_TRY_INITIATION_INTERVAL(x) \ + _Pragma(__STRINGIFY(clang loop pipeline_initiation_interval(x))) +#define AIE_PREPARE_FOR_POSTPIPELINING _Pragma("clang loop pipeline(disable)") +#define AIE_LOOP_FLATTEN + +#else +#define AIE_LOOP_UNROLL(x) +#define AIE_LOOP_UNROLL_FULL +#define AIE_LOOP_NO_UNROLL +#define AIE_LOOP_MIN_ITERATION_COUNT(x) +#define AIE_LOOP_MAX_ITERATION_COUNT(x) +#define AIE_LOOP_RANGE(a, ...) +#define AIE_PREPARE_FOR_PIPELINING +#define AIE_NO_PREPARE_FOR_PIPELINING +#define AIE_MODULO_SCHEDULING_BUDGET_RATIO(x) +#define AIE_KEEP_SW_LOOP +#define AIE_PEEL_PIPELINED_LOOP(x) +#define AIE_KEEP_FREE_FOR_PIPELINING(x) +#define AIE_ALLOCATE(x) +#define AIE_NO_HW_LOOP +#define AIE_TRY_INITIATION_INTERVAL(x) +#define AIE_PREPARE_FOR_POSTPIPELINING +#define AIE_LOOP_FLATTEN +#endif + +#endif \ No newline at end of file diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/helper.h b/tools/aie-chess-simulation/mixed_bfp16_bf16/helper.h new file mode 100644 index 00000000000..ba137dfad79 --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/helper.h @@ -0,0 +1,175 @@ +//===- helper.h -------------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2025, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + + + +#include "io_helpers.h" + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "aie_api/aie.hpp" + +// block - block size +// size - length of the input array +// array - the array +// returnArray - the array to be filled with the quantized values +// rounding - 0 for zero, 1 for nearest (tie to even) +// verbose - make some noise +// Quantization of an array of floats to bfp16. +// The return array is structured as follows: +// 1. The first byte is the shared exponent (max exponent of the block). +// 2. The next *block* bytes are the quantized values. +inline std::vector floatToBfp16(int block, int size, float *array, int rounding = 0) { + std::vector res(size * 1.125); + + int mbits = 7; + int start = 0, end, i, currentIndex = 1; + unsigned int sign, exp, maxExp; + unsigned int *p, mantissa; + uint8_t valueInt8; + + while (true) { + // decide on the block (starting and ending point) + end = start + block; + end = end > size ? size : end; + + // Find max exp + maxExp = 0; + for (i = start; i < end; i++) { + p = (unsigned int *)(array + i); + exp = *p >> 23; // Get rid of mantissa + exp &= 0x000000FF; // Keep the last 8 bit exponent (remove sign) + + maxExp = maxExp < exp ? exp : maxExp; + } + + // Round each number + for (i = start; i < end; i++) { + p = (unsigned int *)(array + i); + + sign = *p & 0x80000000; // Sign + exp = *p >> 23; // Get rid of mantissa + exp &= 0x000000FF; // Keep the last 8 bit exponent (remove sign) + mantissa = *p & 0x007FFFFF; // 23-bit mantissa + if (exp) + mantissa |= 0x00800000; // add the implicit for normal value + + if (exp >= 255) + continue; // Infinity or NaN remains + + // The rouding mode for the mantissa in AIE2p is always truncation + // Each scalar value is stored in two's complement representation + mantissa = sign ? ~mantissa + 1 : mantissa; + // At least erase 23 - mbits + 1 (+1 is for making the implicit bit + // explicit) + valueInt8 = mantissa >> (23 - mbits + 1); + + // Note that shifting by more than 32 bits is undefined behavior in C++ + if (maxExp - exp >= 32) { + valueInt8 = sign ? 0xff : 0x00; + } else { + // Perform an arithmetic right shift + // Again, the rounding mode is truncation for AIE2p + valueInt8 = static_cast(valueInt8) >> (maxExp - exp); + } + + res[currentIndex] = valueInt8; + currentIndex++; + } + res[currentIndex - 9] = (uint8_t)maxExp; + currentIndex++; + start = end; + if (start >= size) + break; + } + + return res; +} + + +// Helper to print matrix in required format using C-style FILE* +void print_matrix_float(const char* filename, float* data, int rows, int cols) { + FILE* fp = open_file(filename, "w+"); + fprintf(fp, "(%d, %d)\n", rows, cols); + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + fprintf(fp, "%f", (float)data[i * cols + j]); + if (j < cols - 1) fprintf(fp, " "); + } + fprintf(fp, "\n"); + } + fclose(fp); +} + +// Helper to print matrix in required format using C-style FILE* +void print_matrix_bfloat16(const char* filename, bfloat16* data, int rows, int cols) { + FILE* fp = open_file(filename, "w+"); + fprintf(fp, "(%d, %d)\n", rows, cols); + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + fprintf(fp, "%f", (float)data[i * cols + j]); + if (j < cols - 1) fprintf(fp, " "); + } + fprintf(fp, "\n"); + } + fclose(fp); +} + +// Golden result calculation: naive matrix multiplication (float) +void calc_golden_result(const float* A, const float* B, float* C, int M, int K, int N) { + // C[M x N] = A[M x K] * B[K x N] + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + float sum = 0.0f; + for (int k = 0; k < K; ++k) { + float a_val = (float)A[i * K + k]; + float b_val = (float)B[k * N + j]; + if (i == 0 && j == 0 && k < 8) { + printf("DEBUG: A[0][%d]=%f, B[%d][0]=%f\n", k, a_val, k, b_val); + } + sum += a_val * b_val; + } + if (i == 0 && j < 8) { + printf("DEBUG: gold[%d] sum = %f\n", j, sum); + } + C[i * N + j] = (float)sum; + } + } +} + +// Layout transpose function: reorganize 8x8 matrix from row-major to column-major layout +// Input: 8x8 float array, row-major +// Output: 8x8 array in column-major layout +void layout_transpose_8x8block(float* input, float* output, int rows, int cols) { + + int output_idx = 0; + + // Process the single 8x8 block in column-major order + for (int col = 0; col < 8; col++) { + for (int row = 0; row < 8; row++) { + // Calculate the position in the original row-major matrix + int orig_idx = row * 8 + col; + + // Copy to output in column-major layout + output[output_idx++] = input[orig_idx]; + } + } +} + + diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/io_helpers.h b/tools/aie-chess-simulation/mixed_bfp16_bf16/io_helpers.h new file mode 100644 index 00000000000..7e0a4a02e38 --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/io_helpers.h @@ -0,0 +1,367 @@ +/* (c) Copyright 2014 - 2019 Xilinx, Inc. All rights reserved. + + This file contains confidential and proprietary information + of Xilinx, Inc. and is protected under U.S. and + international copyright and other intellectual property + laws. + + DISCLAIMER + This disclaimer is not a license and does not grant any + rights to the materials distributed herewith. Except as + otherwise provided in a valid license issued to you by + Xilinx, and to the maximum extent permitted by applicable + law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND + WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES + AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING + BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- + INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and + (2) Xilinx shall not be liable (whether in contract or tort, + including negligence, or under any other theory of + liability) for any loss or damage of any kind or nature + related to, arising under or in connection with these + materials, including for any direct, or any indirect, + special, incidental, or consequential loss or damage + (including loss of data, profits, goodwill, or any type of + loss or damage suffered as a result of any action brought + by a third party) even if such damage or loss was + reasonably foreseeable or Xilinx had been advised of the + possibility of the same. + + CRITICAL APPLICATIONS + Xilinx products are not designed or intended to be fail- + safe, or for use in any application requiring fail-safe + performance, such as life-support or safety devices or + systems, Class III medical devices, nuclear facilities, + applications related to the deployment of airbags, or any + other applications that could lead to death, personal + injury, or severe property or environmental damage + (individually and collectively, "Critical + Applications"). Customer assumes the sole risk and + liability of any use of Xilinx products in Critical + Applications, subject only to applicable laws and + regulations governing limitations on product liability. + + THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS + PART OF THIS FILE AT ALL TIMES. */ + +#pragma once + +#ifndef __AIE_API_TESTS_IO_HELPERS_HPP__ +#define __AIE_API_TESTS_IO_HELPERS_HPP__ + +#include +#include +#include + +#include "aie_api/aie.hpp" + +[[maybe_unused]] static FILE *open_file(const char* filename, const char *mode) +{ + FILE *fp = fopen(filename,mode); + + if (fp == NULL) { + fprintf(stderr, "ERROR: Cannot open file '%s'.\n",filename); + exit(1); + } + + return fp; +} + +[[maybe_unused]] static void write_file(const int8 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%d\n", output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(const uint8 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%u\n", (unsigned)output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(const int16 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%d\n", output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(const uint16 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%u\n", output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(const int32 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%d\n", output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(const uint32 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%u\n", output[i]); + + fclose(fp); +} + +[[maybe_unused]] static void write_file(float *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%f\n", output[i]); + + fclose(fp); +} + +#if __AIE_ARCH__ >= 20 +[[maybe_unused]] static void write_file(bfloat16 *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + + for (int i = 0; i < num; i++) + fprintf(fp, "%f\n", (float)(output[i])); + + fclose(fp); +} +#endif + +[[maybe_unused]] static void write_file(const float *output, unsigned num, bool cmplx, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + if (cmplx) { + for (int i = 0; i < num/2; i++) + fprintf(fp, "%9.6g %9.6g\n", output[2*i], output[2*i+1]); + } + else { + for (int i = 0; i < num; i++) + fprintf(fp, "%f\n", output[i]); + } + + fclose(fp); +} + +#if __AIE_ARCH__ == 10 || __AIE_API_COMPLEX_FP32_EMULATION__ +[[maybe_unused]] static void write_file(const cfloat *output, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename,"w+"); + const float *tmp = (const float*)output; + + for (int i = 0; i < num; i++) + fprintf(fp, "%9.6g %9.6g\n", tmp[2*i], tmp[2*i+1]); + + fclose(fp); +} +#endif + +[[maybe_unused]] static void read_file(int8 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + int re; + int ret = fscanf(fp, "%d", &re); + if (ret != 1) fprintf(stderr, "failed: %d\n", i); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +[[maybe_unused]] static void read_file(uint8 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + unsigned re; + int ret = fscanf(fp, "%u", &re); + if (ret != 1) fprintf(stderr, "failed: %d\n", i); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +[[maybe_unused]] static void read_file(int16 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + int re; + int ret = fscanf(fp, "%d", &re); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +[[maybe_unused]] static void read_file(uint16 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + unsigned re; + int ret = fscanf(fp, "%u", &re); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +[[maybe_unused]] static void read_file(int32 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + int re; + int ret = fscanf(fp, "%d", &re); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +[[maybe_unused]] static void read_file(uint32 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + unsigned re; + int ret = fscanf(fp, "%u", &re); + assert(ret == 1); + + *dest++ = re; + } + + fclose(fp); +} + +typedef int (*stream_32_in_t)(); +typedef void (*stream_32_out_t)(int); + +//read 32bit stream +[[maybe_unused]] static void read_stream(int16 *dest, unsigned num, bool cplx, stream_32_in_t stream_in) +{ + int32 tmp; + if (cplx) { + for (int i=0; i>16)&0xffff); + } + } + else { + for (int i=0; i= 20 +[[maybe_unused]] static void read_file(bfloat16 *dest, unsigned num, const char* filename) +{ + FILE *fp = open_file(filename, "r"); + + for (int i = 0; i < num; ++i) { + float re; + int ret = fscanf(fp, "%f", &re); + assert(ret == 1); + + *dest++ = (bfloat16)re; + } + + fclose(fp); +} +#endif + +[[maybe_unused]] static void read_file(cfloat *dest, unsigned num, const char* filename) +{ + return read_file((float *)dest, num * 2, filename); +} + +#endif // __AIE_API_TESTS_IO_HELPERS_HPP__ diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/sim.tcl b/tools/aie-chess-simulation/mixed_bfp16_bf16/sim.tcl new file mode 100644 index 00000000000..adad53661d5 --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/sim.tcl @@ -0,0 +1,13 @@ +proc my_load_program {file} { + set me_DIR $::env(me_DIR) + iss program load $file -nmlpath $me_DIR -do_not_set_entry_pc 1 -pm_check first -load_offsets {} +} + +iss::create %PROCESSORNAME% iss +my_load_program [lindex $::iss::tcl_script_args 0] +iss step -1 +set retcode [iss program query exit_code] +puts -nonewline "@@ EXIT STATUS " +puts $retcode +exit $retcode + diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/test.cc b/tools/aie-chess-simulation/mixed_bfp16_bf16/test.cc new file mode 100755 index 00000000000..b0361fd351d --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/test.cc @@ -0,0 +1,84 @@ +#include "helper.h" +#include "aie_kernel_utils.h" + + +void single_mac_8x8x8(bfloat16 *__restrict inA, + bfp16ebs8 *__restrict inB, + bfloat16 *__restrict outC) { + aie::vector A_data_bf16 = aie::load_v<64>(inA); + aie::accum A_data_float; + A_data_float = A_data_bf16; + aie::block_vector A_data_bfp = A_data_float.to_vector(); + + aie::block_vector_input_buffer_stream pB_stream(inB); + aie::block_vector B_data = pB_stream.pop(); + aie::accum acc_data = aie::zeros(); + + chess_report(A_data_bfp); + chess_report(B_data); + acc_data = mac_8x8_8x8T(A_data_bfp, B_data, acc_data); + chess_report(acc_data); + aie::vector C_data = acc_data.template to_vector(); + chess_report(C_data); + aie::store_v(outC, C_data); +} + + +constexpr int M = 8; constexpr int K = 8; constexpr int N = 8; +constexpr int m = 8; constexpr int k = 8; constexpr int n = 8; +constexpr int r = 8; constexpr int s = 8; constexpr int t = 8; + + + +int main() +{ + + printf("test start ...\n"); + int A_SIZE = M * K; + int B_SIZE = N * K; + int C_SIZE = M * N; + size_t A_VOLUME = (A_SIZE * sizeof(uint8_t)) * 1.125; + size_t B_VOLUME = (B_SIZE * sizeof(uint8_t)) * 1.125; + size_t C_VOLUME = (C_SIZE * sizeof(uint8_t)) * 1.125; + + float* A_float = (float*)malloc(A_SIZE * sizeof(float)); + float* B_float = (float*)malloc(B_SIZE * sizeof(float)); + for (int i = 0; i < A_SIZE; i++) { + A_float[i] = i % 8; + } + for (int i = 0; i < B_SIZE; i++) { + B_float[i] = i % 8 ; + } + + // Test layout transpose function + printf("Testing layout transpose...\n"); + float* B_transposed = (float*)malloc(B_SIZE * sizeof(float)); + layout_transpose_8x8block(B_float, B_transposed, N, K); + + float* Gold_float = (float*)malloc(C_SIZE * sizeof(float)); + calc_golden_result(A_float, B_float, Gold_float, M, K, N); + + print_matrix_float("output/A.txt", A_float, M, K); + print_matrix_float("output/B.txt", B_float, N, K); + print_matrix_float("output/B_transposed.txt", B_transposed, N, K); + print_matrix_float("output/Gold.txt", Gold_float, M, N); + + alignas(aie::vector_decl_align) bfloat16 A_bfloat16[A_SIZE]; + for (int i = 0; i < A_SIZE; i++) { + A_bfloat16[i] = (bfloat16)A_float[i]; + } + + std::vector B_bfp16ebs8 = floatToBfp16(8, B_SIZE , B_transposed, 0); + alignas(aie::vector_decl_align) bfloat16 C_bfloat16[64]; + single_mac_8x8x8(A_bfloat16, (bfp16ebs8*)B_bfp16ebs8.data(), C_bfloat16); + + print_matrix_bfloat16("output/C.txt", C_bfloat16, M, N); + + free(Gold_float); + free(A_float); + free(B_float); + free(B_transposed); + + printf("test done!\n"); + return 0; +} diff --git a/tools/aie-chess-simulation/mixed_bfp16_bf16/test.prx b/tools/aie-chess-simulation/mixed_bfp16_bf16/test.prx new file mode 100644 index 00000000000..2911f739a15 --- /dev/null +++ b/tools/aie-chess-simulation/mixed_bfp16_bf16/test.prx @@ -0,0 +1,8 @@ + + + diff --git a/tools/aie-chess-simulation/simple_test/Makefile b/tools/aie-chess-simulation/simple_test/Makefile new file mode 100644 index 00000000000..a84e72a85d1 --- /dev/null +++ b/tools/aie-chess-simulation/simple_test/Makefile @@ -0,0 +1,15 @@ +all: build sim +.PHONY : all build sim clean + +build: + xchesscc --aiearch aie2p -p me -C Release_LLVM -D__AIENGINE__ - -I ${AIETOOLS_ROOT}/include -I ${AIETOOLS_ROOT}/include/aie_api -P ${AIETOOLS_ROOT}/data/aie2p/lib -d -f -g +s +w work +o work -I. -I $../../../.. test.cc + +sim: + xca_udm_dbg --aiearch aie2p -qf -T -P ${AIETOOLS_ROOT}/data/aie2p/lib -t "sim.tcl work/a.out" + +clean: + rm -rf work *txt *mem *.output + + + + diff --git a/tools/aie-chess-simulation/simple_test/sim.tcl b/tools/aie-chess-simulation/simple_test/sim.tcl new file mode 100644 index 00000000000..adad53661d5 --- /dev/null +++ b/tools/aie-chess-simulation/simple_test/sim.tcl @@ -0,0 +1,13 @@ +proc my_load_program {file} { + set me_DIR $::env(me_DIR) + iss program load $file -nmlpath $me_DIR -do_not_set_entry_pc 1 -pm_check first -load_offsets {} +} + +iss::create %PROCESSORNAME% iss +my_load_program [lindex $::iss::tcl_script_args 0] +iss step -1 +set retcode [iss program query exit_code] +puts -nonewline "@@ EXIT STATUS " +puts $retcode +exit $retcode + diff --git a/tools/aie-chess-simulation/simple_test/test.cc b/tools/aie-chess-simulation/simple_test/test.cc new file mode 100755 index 00000000000..871017df86b --- /dev/null +++ b/tools/aie-chess-simulation/simple_test/test.cc @@ -0,0 +1,21 @@ +#include +#include +#include +#include + +int __inline__ kernel(int a) +{ + return a*2; +} + + + +int main() +{ + printf("test start ...\n"); + int a = 1; + int res = kernel(a); + + printf("test done!\n"); + return 0; +} diff --git a/tools/aie-chess-simulation/simple_test/test.prx b/tools/aie-chess-simulation/simple_test/test.prx new file mode 100644 index 00000000000..2911f739a15 --- /dev/null +++ b/tools/aie-chess-simulation/simple_test/test.prx @@ -0,0 +1,8 @@ + + +