Skip to content

Commit

Permalink
Ensure aie partition width is sufficiently large (#1997)
Browse files Browse the repository at this point in the history
Co-authored-by: AndraBisca <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 19, 2024
1 parent 6d5becc commit 4d613f9
Show file tree
Hide file tree
Showing 3 changed files with 253 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python/compiler/aiecc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def emit_partition(mlir_module_str, kernel_id="0x901", start_columns=None):
module.operation,
lambda o: isinstance(o.operation.opview, aiedialect.TileOp),
)
min_col = min([t.col.value for t in tiles], default=0)
min_col = 0
max_col = max([t.col.value for t in tiles], default=0)
num_cols = max_col - min_col + 1
device = find_ops(
Expand Down
57 changes: 57 additions & 0 deletions test/npu-xrt/device_width/aie2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# device_width/aie2.py -*- Python -*-
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
#
# REQUIRES: ryzen_ai, valid_xchess_license
#
# RUN: %python %S/aie2.py > ./aie2.mlir
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags
# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir
# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt

import numpy as np
import sys

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.extras.context import mlir_mod_ctx

N = 4096
dev = AIEDevice.npu1_2col
line_size = 1024


def my_passthrough():
with mlir_mod_ctx() as ctx:

@device(dev)
def device_body():
vector_ty = np.ndarray[(N,), np.dtype[np.int32]]
line_ty = np.ndarray[(line_size,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(1, 0)
ComputeTile2 = tile(1, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty)
of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty)
object_fifo_link(of_in, of_out)

# To/from AIE-array data movement
@runtime_sequence(vector_ty, vector_ty, vector_ty)
def sequence(A, B, C):
npu_dma_memcpy_nd(
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True
)
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N])
dma_wait(of_in, of_out)

print(ctx.module)


my_passthrough()
195 changes: 195 additions & 0 deletions test/npu-xrt/device_width/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//===- test.cpp -------------------------------------------000---*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

#include <boost/program_options.hpp>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "xrt/xrt_bo.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_kernel.h"

namespace po = boost::program_options;

void check_arg_file_exists(po::variables_map &vm_in, std::string name) {
if (!vm_in.count(name)) {
throw std::runtime_error("Error: no " + name + " file was provided\n");
} else {
std::ifstream test(vm_in[name].as<std::string>());
if (!test) {
throw std::runtime_error("The " + name + " file " +
vm_in[name].as<std::string>() +
" does not exist.\n");
}
}
}

std::vector<uint32_t> load_instr_sequence(std::string instr_path) {
std::ifstream instr_file(instr_path);
std::string line;
std::vector<uint32_t> instr_v;
while (std::getline(instr_file, line)) {
std::istringstream iss(line);
uint32_t a;
if (!(iss >> std::hex >> a)) {
throw std::runtime_error("Unable to parse instruction file\n");
}
instr_v.push_back(a);
}
return instr_v;
}

int main(int argc, const char *argv[]) {
// Program arguments parsing
po::options_description desc("Allowed options");
desc.add_options()("help,h", "produce help message")(
"xclbin,x", po::value<std::string>()->required(),
"the input xclbin path")(
"kernel,k", po::value<std::string>()->required(),
"the kernel name in the XCLBIN (for instance PP_PRE_FD)")(
"verbosity,v", po::value<int>()->default_value(0),
"the verbosity of the output")(
"instr,i", po::value<std::string>()->required(),
"path of file containing userspace instructions to be sent to the LX6")(
"length,l", po::value<int>()->default_value(4096),
"the length of the transfer in int32_t");
po::variables_map vm;

try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);

if (vm.count("help")) {
std::cout << desc << std::endl;
return 1;
}
} catch (const std::exception &ex) {
std::cerr << ex.what() << "\n\n";
std::cerr << "Usage:\n" << desc << std::endl;
return 1;
}

check_arg_file_exists(vm, "xclbin");
check_arg_file_exists(vm, "instr");

std::vector<uint32_t> instr_v =
load_instr_sequence(vm["instr"].as<std::string>());

int verbosity = vm["verbosity"].as<int>();
if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << std::endl;

int N = vm["length"].as<int>();
if ((N % 1024)) {
std::cerr << "Length must be a multiple of 1024." << std::endl;
return 1;
}

// Start the XRT test code
// Get a device handle
unsigned int device_index = 0;
auto device = xrt::device(device_index);

// Load the xclbin
if (verbosity >= 1)
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>()
<< std::endl;
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>());

if (verbosity >= 1)
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>()
<< std::endl;
std::string Node = vm["kernel"].as<std::string>();

// Get the kernel from the xclbin
auto xkernels = xclbin.get_kernels();
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(),
[Node](xrt::xclbin::kernel &k) {
auto name = k.get_name();
std::cout << "Name: " << name << std::endl;
return name.rfind(Node, 0) == 0;
});
auto kernelName = xkernel.get_name();

if (verbosity >= 1)
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>()
<< "\n";

device.register_xclbin(xclbin);

// get a hardware context
if (verbosity >= 1)
std::cout << "Getting hardware context." << std::endl;
xrt::hw_context context(device, xclbin.get_uuid());

// get a kernel handle
if (verbosity >= 1)
std::cout << "Getting handle to kernel:" << kernelName << std::endl;
auto kernel = xrt::kernel(context, kernelName);

auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
auto bo_inA = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(3));
auto bo_inB = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(4));
auto bo_out = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(5));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects." << std::endl;

int32_t *bufInA = bo_inA.map<int32_t *>();
std::vector<uint32_t> srcVecA;
for (int i = 0; i < N; i++)
srcVecA.push_back(i + 1);
memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t)));

void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));

bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);

if (verbosity >= 1)
std::cout << "Running Kernel." << std::endl;
unsigned int opcode = 3;
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
run.wait();

bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

uint32_t *bufOut = bo_out.map<uint32_t *>();

int errors = 0;

for (uint32_t i = 0; i < N; i++) {
uint32_t ref = (i + 1);
if (*(bufOut + i) != ref) {
errors++;
}
}

if (!errors) {
std::cout << std::endl << "PASS!" << std::endl << std::endl;
return 0;
} else {
std::cout << std::endl
<< errors << " mismatches." << std::endl
<< std::endl;
std::cout << std::endl << "fail." << std::endl << std::endl;
return 1;
}
}

0 comments on commit 4d613f9

Please sign in to comment.