From 8843f5d135afdbc1270fffbf662804894d9f7ba9 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 24 Aug 2023 09:44:21 +0400 Subject: [PATCH] [Snippets] Added MemorySolver support --- .../snippets/lowered/buffer_manager.hpp | 17 ++- .../snippets/lowered/pass/init_loops.hpp | 2 +- .../snippets/include/snippets/utils.hpp | 7 + .../snippets/src/lowered/buffer_manager.cpp | 123 ++++++++++++------ 4 files changed, 100 insertions(+), 49 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/buffer_manager.hpp b/src/common/snippets/include/snippets/lowered/buffer_manager.hpp index d9783d46ed49a4..97be3b83bda382 100644 --- a/src/common/snippets/include/snippets/lowered/buffer_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/buffer_manager.hpp @@ -27,8 +27,6 @@ class BufferManager { */ int64_t allocate(); - void propagate_offset(const ExpressionPtr& buffer_expr, const size_t offset) const; - private: using BufferCluster = std::set; using BufferClusters = std::vector; @@ -37,7 +35,7 @@ class BufferManager { * @brief init Buffers as graph edges and other subgraph around the Buffers as Nodes using enumeration * Parameter * |--- LoopBegin Parameter - * | LoadReshape <- already allocated. Skip + * | LoadReshape <- already allocated (since after Parameter). Skip * | Store ---> Node <- (LoopBegin,...,LoopEnd) * |--- LoopEnd <- Buffer. Intermediate memory (edge) * Buffer Node <- (LoopBegin,...,LoopEnd) @@ -45,15 +43,24 @@ class BufferManager { * | ... */ void init_clusters(const lowered::LinearIR& linear_ir); - + /** + * @brief init boxes for MemorySolver + */ + void init_boxes(); /** * @brief Default initialization: makes non-inplace Buffers and sets different Buffer IDs, offsets */ void initialization(); + /** + * @brief Set offset to Buffer op and propagate it to the connected memory access ops + */ + void set_buffer_offset(const ExpressionPtr& buffer_expr, const size_t offset) const; - BufferClusters buffer_clusters; + BufferClusters buffer_clusters = {}; + std::vector boxes = {}; size_t m_scratchpad_size = 0; bool m_enable_optimizations = true; + constexpr static int64_t alignment = 32; // 32 bytes }; } // namespace lowered diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index 8e83e88ee60c2c..974c6a46bd071b 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -15,7 +15,7 @@ namespace pass { /** * @interface InitLoops - * @brief The pass initialize scheduling information in LoopInfo + * @brief The pass initializes scheduling information in LoopInfo * @ingroup snippets */ class InitLoops : public Pass { diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 8217e252d3eb56..bbd7eb0fdb179f 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -47,6 +47,13 @@ template constexpr bool everyone_is(T val, P item, Args... item_others) { return val == item && everyone_is(val, item_others...); } + +template +inline T div_up(const T a, const U b) { + assert(b); + return static_cast((a + b - 1) / b); +} + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/src/lowered/buffer_manager.cpp b/src/common/snippets/src/lowered/buffer_manager.cpp index 7cf58b56c273e8..6e5c38deffa821 100644 --- a/src/common/snippets/src/lowered/buffer_manager.cpp +++ b/src/common/snippets/src/lowered/buffer_manager.cpp @@ -10,6 +10,7 @@ #include "snippets/op/memory_access.hpp" #include "snippets/op/loop.hpp" #include "snippets/itt.hpp" +#include "snippets/utils.hpp" namespace ov { @@ -24,52 +25,24 @@ BufferManager::BufferManager(const lowered::LinearIR& linear_ir) { int64_t BufferManager::allocate() { initialization(); - return m_scratchpad_size; -} + if (m_enable_optimizations) { + // Initialize boxes for MemorySolver + init_boxes(); -void BufferManager::init_clusters(const LinearIR& linear_ir) { - int64_t order = 0; - for (const auto& expr : linear_ir) { - const auto op = expr->get_node(); - if (ov::is_type(op) || - ov::is_type(op) || - ov::is_type(op) || - ov::is_type(op)) - continue; + MemorySolver staticMemSolver(boxes); + m_scratchpad_size = static_cast(staticMemSolver.solve()) * alignment; - if (const auto buffer = ov::as_type_ptr(op)) { - ov::snippets::pass::SetTopologicalOrder(buffer, order++); - buffer_clusters.push_back(BufferCluster{expr}); // TODO: Add support of inplace - continue; - } - if (const auto loop_end = ov::as_type_ptr(op)) { - // LoopBegin should have the same order as the corresponding LoopEnd - const auto loop_begin = loop_end->get_loop_begin(); - ov::snippets::pass::SetTopologicalOrder(loop_begin, order); - ov::snippets::pass::SetTopologicalOrder(op, order++); - continue; + // Set offsets for Buffers (edges) + for (auto& box : boxes) { + for (auto& buffer : buffer_clusters[box.id]) { + int64_t offset = staticMemSolver.getOffset(box.id); + set_buffer_offset(buffer, offset * alignment); // alignment in byte + } } - - //bool is_node = false; // Meaning in MemoryManager bounds - //for (size_t i = 0; i < op->get_input_size() && !is_node; ++i) { - // is_node = is_node || ov::is_type(op->get_input_node_shared_ptr(i)); - //} - //for (size_t i = 0; i < op->get_output_size() && !is_node; ++i) { - // const auto target_consumers = op->get_output_target_inputs(i); - // for (const auto& in : target_consumers) { - // if (ov::is_type(in.get_node())) { - // is_node = true; - // break; - // } - // } - //} - - // if (is_node) { - // ov::snippets::pass::SetTopologicalOrder(op, order++); - // } } -} + return m_scratchpad_size; +} void BufferManager::initialization() { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BufferManager::initialization") @@ -85,7 +58,7 @@ void BufferManager::initialization() { continue; const auto byte_size = buffer->get_byte_size(); - propagate_offset(buffer_expr, buffer_offset); + set_buffer_offset(buffer_expr, buffer_offset); buffer->set_id(buffer_id); buffer_offset += byte_size; @@ -95,7 +68,71 @@ void BufferManager::initialization() { m_scratchpad_size = buffer_offset; } -void BufferManager::propagate_offset(const ExpressionPtr& buffer_expr, const size_t offset) const { +void BufferManager::init_clusters(const LinearIR& linear_ir) { + int64_t order = 0; + for (const auto& expr : linear_ir) { + const auto op = expr->get_node(); + if (const auto buffer = ov::as_type_ptr(op)) { + buffer_clusters.push_back(BufferCluster{expr}); // TODO: Add support of inplace + } + ov::snippets::pass::SetTopologicalOrder(op, order++); + } +} + +void BufferManager::init_boxes() { + const auto count = static_cast(buffer_clusters.size()); + for (int i = 0; i < count; i++) { + MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, i }; + int64_t box_size = 0; + for (const auto& buffer_expr : buffer_clusters[i]) { + int e_start = 0, e_finish = 0; + const auto buffer = ov::as_type_ptr(buffer_expr->get_node()); + OPENVINO_ASSERT(buffer != nullptr, "BufferManager expects Buffer ops in clusters"); + const auto buffer_order = static_cast(ov::snippets::pass::GetTopologicalOrder(buffer)); + + // life finish time - order of LoopEnd / MemoryAccess ops + const auto buffer_outs = buffer_expr->get_output_port_connectors(); + for (const auto& buffer_out : buffer_outs) { + const auto consumers = buffer_out->get_consumers(); + for (const auto& consumer : consumers) { + const auto consumer_order = static_cast(ov::snippets::pass::GetTopologicalOrder(consumer.get_expr()->get_node())); + e_finish = std::max(e_finish, consumer_order); + } + } + e_start = e_finish; + + const auto buffer_ins = buffer_expr->get_input_port_connectors(); + for (const auto& buffer_in : buffer_ins) { + const auto& source = buffer_in->get_source(); + auto local_order = static_cast(ov::snippets::pass::GetTopologicalOrder(source.get_expr()->get_node())); + + const auto buffer_siblings = buffer_in->get_consumers(); + for (const auto& sibling : buffer_siblings) { + const auto loop_end = ov::as_type_ptr(sibling.get_expr()->get_node()); + if (!loop_end) + continue; + const auto loop_end_order = static_cast(ov::snippets::pass::GetTopologicalOrder(loop_end)); + if (loop_end_order < buffer_order) { + local_order = std::min(local_order, static_cast(ov::snippets::pass::GetTopologicalOrder(loop_end->get_loop_begin()))); + } + } + e_start = std::min(e_start, local_order); + } + + // TODO: Added support of Dynamic Buffers + auto buffer_size = static_cast(buffer->get_byte_size()); + box_size = std::max(buffer_size, box_size); + + box.start = std::min(e_start, box.start); + box.finish = std::max(e_finish, box.finish); + } + + box.size = utils::div_up(box_size, alignment); + boxes.push_back(box); + } +} + +void BufferManager::set_buffer_offset(const ExpressionPtr& buffer_expr, const size_t offset) const { // If Buffer has offset We set this offset in the connected MemoryAccess ops // to correctly read and write data because all Buffers have the common data pointer on buffer scratchpad