-
Notifications
You must be signed in to change notification settings - Fork 124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[L0] Enable Copy engine support with in-order command lists #2636
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,64 @@ ur_result_t getEventsFromSyncPoints( | |
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
/** | ||
* If necessary, it creates a signal event and appends it to the previous | ||
* command list (copy or compute), to indicate when it's finished executing. | ||
* @param[in] CommandType The type of the command. | ||
* @param[in] CommandBuffer The CommandBuffer where the command is appended. | ||
* @param[in] IsFirstNode A boolean inidicating if the current node is at the | ||
* beginning of the command list. | ||
* @param[in] isCopy A boolean indicating if the current command uses copy | ||
* engine. | ||
* @param[out] ZeSignalPrevCommandEvent The event which signals when the | ||
* previous command appended to a different command list is finished executing. | ||
* @return UR_RESULT_SUCCESS or an error code on failure | ||
*/ | ||
ur_result_t createSyncPointBetweenCopyAndCompute( | ||
ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer, | ||
bool IsFirstNode, bool IsCopy, | ||
ze_event_handle_t &ZeSignalPrevCommandEvent) { | ||
if (IsFirstNode) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we avoid using |
||
CommandBuffer->MCopyCommandListEmpty = !IsCopy; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this bit of code needed? Isn't |
||
CommandBuffer->MComputeCommandListEmpty = IsCopy; | ||
CommandBuffer->MWasPrevCopyCommandList = IsCopy; | ||
return UR_RESULT_SUCCESS; | ||
} else if (CommandBuffer->MWasPrevCopyCommandList == IsCopy) { | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
ur_event_handle_t SignalPrevCommandEvent = nullptr; | ||
UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, | ||
false /*IsMultiDevice*/, false, &SignalPrevCommandEvent, | ||
false /*CounterBasedEventEnabled*/, | ||
!CommandBuffer->IsProfilingEnabled, | ||
false /*InterruptBasedEventEnabled*/)); | ||
SignalPrevCommandEvent->CommandType = CommandType; | ||
|
||
/* | ||
* If the current CommandType is different from the one used lastly, we | ||
* need to append ZeSignalPrevCommandEvent to the latter to know when it's | ||
* finished executing. | ||
*/ | ||
ZeSignalPrevCommandEvent = SignalPrevCommandEvent->ZeEvent; | ||
if (CommandBuffer->MWasPrevCopyCommandList && !IsCopy) { | ||
ZE2UR_CALL(zeCommandListAppendSignalEvent, | ||
(CommandBuffer->ZeCopyCommandList, ZeSignalPrevCommandEvent)); | ||
CommandBuffer->MWasPrevCopyCommandList = false; | ||
} else { | ||
ZE2UR_CALL(zeCommandListAppendSignalEvent, | ||
(CommandBuffer->ZeComputeCommandList, ZeSignalPrevCommandEvent)); | ||
CommandBuffer->MWasPrevCopyCommandList = true; | ||
} | ||
|
||
// Get sync point and register the event with it. | ||
ur_exp_command_buffer_sync_point_t SyncPoint = | ||
CommandBuffer->getNextSyncPoint(); | ||
CommandBuffer->registerSyncPoint(SyncPoint, SignalPrevCommandEvent); | ||
Comment on lines
+223
to
+226
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is every call to this function inside a check for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I follow. AFAIK Registering I might be misunderstanding the purpose of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is more what I was getting at. As was trying to ask if we need this sync point if the UR user doesn't care about sync points. However, you've answered my question by saying that we need to do this to cache the L0 event and reset it later. So I guess my feedback is to update the code comment to append ", so that the event is reset between executions"
|
||
|
||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
/** | ||
* If needed, creates a sync point for a given command and returns the L0 | ||
* events associated with the sync point. | ||
|
@@ -225,7 +283,7 @@ ur_result_t createSyncPointAndGetZeEvents( | |
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
// Shared by all memory read/write/copy PI interfaces. | ||
// Shared by all memory read/write/copy UR interfaces. | ||
// Helper function for common code when enqueuing memory operations to a command | ||
// buffer. | ||
ur_result_t enqueueCommandBufferMemCopyHelper( | ||
|
@@ -241,9 +299,25 @@ ur_result_t enqueueCommandBufferMemCopyHelper( | |
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, | ||
false, RetSyncPoint, ZeEventList, ZeLaunchEvent)); | ||
|
||
// The chooseCommandList() will modify MComputeCommandListEmpty and | ||
konradkusiak97 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// MCopyCommandListEmpty so isFirstNode needs to be called beforehand. | ||
bool IsFirstNode{CommandBuffer->isFirstNode()}; | ||
|
||
ze_command_list_handle_t ZeCommandList = | ||
CommandBuffer->chooseCommandList(PreferCopyEngine); | ||
|
||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->IsInOrderCmdList && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bit of code is copy pasted a lot. It can probably be refactored into its own function. |
||
ZeCommandList == CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute(CommandType, CommandBuffer, | ||
IsFirstNode, true /*isCopy=*/, | ||
ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZeEventList.push_back(ZeSignalPrevCommandEvent); | ||
} | ||
} | ||
ZE2UR_CALL(zeCommandListAppendMemoryCopy, | ||
(ZeCommandList, Dst, Src, Size, ZeLaunchEvent, ZeEventList.size(), | ||
getPointerFromVector(ZeEventList))); | ||
|
@@ -299,9 +373,26 @@ ur_result_t enqueueCommandBufferMemCopyRectHelper( | |
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, | ||
false, RetSyncPoint, ZeEventList, ZeLaunchEvent)); | ||
|
||
// The chooseCommandList() will modify MComputeCommandListEmpty and | ||
// MCopyCommandListEmpty so isFirstNode needs to be called beforehand. | ||
bool IsFirstNode{CommandBuffer->isFirstNode()}; | ||
|
||
ze_command_list_handle_t ZeCommandList = | ||
CommandBuffer->chooseCommandList(PreferCopyEngine); | ||
|
||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->IsInOrderCmdList && | ||
ZeCommandList == CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute(CommandType, CommandBuffer, | ||
IsFirstNode, true /*isCopy=*/, | ||
ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZeEventList.push_back(ZeSignalPrevCommandEvent); | ||
} | ||
} | ||
|
||
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, | ||
(ZeCommandList, Dst, &ZeDstRegion, DstPitch, DstSlicePitch, Src, | ||
&ZeSrcRegion, SrcPitch, SrcSlicePitch, ZeLaunchEvent, | ||
|
@@ -331,9 +422,26 @@ ur_result_t enqueueCommandBufferFillHelper( | |
UR_CALL( | ||
preferCopyEngineForFill(CommandBuffer, PatternSize, PreferCopyEngine)); | ||
|
||
// The chooseCommandList() will modify MComputeCommandListEmpty and | ||
// MCopyCommandListEmpty so isFirstNode needs to be called beforehand. | ||
bool IsFirstNode{CommandBuffer->isFirstNode()}; | ||
|
||
ze_command_list_handle_t ZeCommandList = | ||
CommandBuffer->chooseCommandList(PreferCopyEngine); | ||
|
||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->IsInOrderCmdList && | ||
ZeCommandList == CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute(CommandType, CommandBuffer, | ||
IsFirstNode, true /*isCopy=*/, | ||
ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZeEventList.push_back(ZeSignalPrevCommandEvent); | ||
} | ||
} | ||
|
||
ZE2UR_CALL(zeCommandListAppendMemoryFill, | ||
(ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeLaunchEvent, | ||
ZeEventList.size(), getPointerFromVector(ZeEventList))); | ||
|
@@ -489,12 +597,13 @@ void ur_exp_command_buffer_handle_t_::registerSyncPoint( | |
|
||
ze_command_list_handle_t | ||
ur_exp_command_buffer_handle_t_::chooseCommandList(bool PreferCopyEngine) { | ||
if (PreferCopyEngine && this->useCopyEngine() && !this->IsInOrderCmdList) { | ||
if (PreferCopyEngine && useCopyEngine()) { | ||
// We indicate that ZeCopyCommandList contains commands to be submitted. | ||
this->MCopyCommandListEmpty = false; | ||
return this->ZeCopyCommandList; | ||
MCopyCommandListEmpty = false; | ||
return ZeCopyCommandList; | ||
} | ||
return this->ZeComputeCommandList; | ||
MComputeCommandListEmpty = false; | ||
return ZeComputeCommandList; | ||
} | ||
|
||
ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue( | ||
|
@@ -661,7 +770,7 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, | |
// the current implementation only uses the main copy engine and does not use | ||
// the link engine even if available. | ||
if (Device->hasMainCopyEngine()) { | ||
UR_CALL(createMainCommandList(Context, Device, false, false, true, | ||
UR_CALL(createMainCommandList(Context, Device, IsInOrder, false, true, | ||
ZeCopyCommandList)); | ||
} | ||
|
||
|
@@ -827,18 +936,18 @@ finalizeWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer) { | |
(CommandBuffer->ZeCommandListResetEvents, | ||
CommandBuffer->ExecutionFinishedEvent->ZeEvent)); | ||
|
||
// Reset the L0 events we use for command-buffer sync-points to the | ||
// non-signaled state. This is required for multiple submissions. | ||
for (auto &Event : CommandBuffer->ZeEventsList) { | ||
ZE2UR_CALL(zeCommandListAppendEventReset, | ||
(CommandBuffer->ZeCommandListResetEvents, Event)); | ||
} | ||
|
||
if (CommandBuffer->IsInOrderCmdList) { | ||
ZE2UR_CALL(zeCommandListAppendSignalEvent, | ||
(CommandBuffer->ZeComputeCommandList, | ||
CommandBuffer->ExecutionFinishedEvent->ZeEvent)); | ||
} else { | ||
// Reset the L0 events we use for command-buffer sync-points to the | ||
// non-signaled state. This is required for multiple submissions. | ||
for (auto &Event : CommandBuffer->ZeEventsList) { | ||
ZE2UR_CALL(zeCommandListAppendEventReset, | ||
(CommandBuffer->ZeCommandListResetEvents, Event)); | ||
} | ||
|
||
// Wait for all the user added commands to complete, and signal the | ||
// command-buffer signal-event when they are done. | ||
ZE2UR_CALL(zeCommandListAppendBarrier, | ||
|
@@ -1084,6 +1193,18 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( | |
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, NumSyncPointsInWaitList, | ||
SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent)); | ||
|
||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->IsInOrderCmdList && CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute( | ||
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, CommandBuffer->isFirstNode(), | ||
false /*isCopy=*/, ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZeEventList.push_back(ZeSignalPrevCommandEvent); | ||
} | ||
} | ||
|
||
ZE2UR_CALL(zeCommandListAppendLaunchKernel, | ||
(CommandBuffer->ZeComputeCommandList, ZeKernel, | ||
&ZeThreadGroupDimensions, ZeLaunchEvent, ZeEventList.size(), | ||
|
@@ -1315,7 +1436,20 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( | |
std::ignore = Flags; | ||
|
||
if (CommandBuffer->IsInOrderCmdList) { | ||
// Add the prefetch command to the command-buffer. | ||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute( | ||
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, CommandBuffer->isFirstNode(), | ||
false /*isCopy=*/, ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZE2UR_CALL(zeCommandListAppendWaitOnEvents, | ||
(CommandBuffer->ZeComputeCommandList, 1, | ||
&ZeSignalPrevCommandEvent)); | ||
} | ||
} | ||
// Add the prefetch command to the command buffer. | ||
// Note that L0 does not handle migration flags. | ||
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, | ||
(CommandBuffer->ZeComputeCommandList, Mem, Size)); | ||
|
@@ -1342,6 +1476,7 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( | |
ZE2UR_CALL(zeCommandListAppendSignalEvent, | ||
(CommandBuffer->ZeComputeCommandList, ZeLaunchEvent)); | ||
} | ||
CommandBuffer->MComputeCommandListEmpty = false; | ||
|
||
return UR_RESULT_SUCCESS; | ||
} | ||
|
@@ -1385,6 +1520,19 @@ ur_result_t urCommandBufferAppendUSMAdviseExp( | |
ze_memory_advice_t ZeAdvice = static_cast<ze_memory_advice_t>(Value); | ||
|
||
if (CommandBuffer->IsInOrderCmdList) { | ||
// Create a sync point if both Copy and Compute in-order command lists are | ||
// used. | ||
if (CommandBuffer->ZeCopyCommandList) { | ||
ze_event_handle_t ZeSignalPrevCommandEvent = nullptr; | ||
UR_CALL(createSyncPointBetweenCopyAndCompute( | ||
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, CommandBuffer->isFirstNode(), | ||
false /*isCopy=*/, ZeSignalPrevCommandEvent)); | ||
if (ZeSignalPrevCommandEvent) { | ||
ZE2UR_CALL(zeCommandListAppendWaitOnEvents, | ||
(CommandBuffer->ZeComputeCommandList, 1, | ||
&ZeSignalPrevCommandEvent)); | ||
} | ||
} | ||
ZE2UR_CALL(zeCommandListAppendMemAdvise, | ||
(CommandBuffer->ZeComputeCommandList, | ||
CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice)); | ||
|
@@ -1410,6 +1558,7 @@ ur_result_t urCommandBufferAppendUSMAdviseExp( | |
ZE2UR_CALL(zeCommandListAppendSignalEvent, | ||
(CommandBuffer->ZeComputeCommandList, ZeLaunchEvent)); | ||
} | ||
CommandBuffer->MComputeCommandListEmpty = false; | ||
|
||
return UR_RESULT_SUCCESS; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.