#if PROTOBUF_VERSION != 5029002 #error"Protobuf C++ gencode is built with an incompatible version of" #error"Protobuf C++ headers/runtime. See" #error"https://protobuf.dev/support/cross-version-runtime-guarantee/#cpp" ... ... #endif// ps: 这个endif在文件末尾
// Enum that represents the type of optimization it is. enumPassType { // Class of optimizations that fuses operations. Fuse = 0, // Class of optimizations that removes useless operations. Nop = 1, // Class of optimizations that includes some form of separation. Separate = 2, // Immutable pass, also sometimes referred to as an analysis pass. Immutable = 3, // Class of optimizations that replaces nodes with other. Replace = 4, // Other type of pass. Other = 5 }; enumPassOptimizationType { // Is not optimizing anything. Most likely will be used in an immutable pass. None = 0, // Optimizes for compute. Compute = 1, // Optimizes for memory. Memory = 2, // Optimizes for both compute and memory. ComputeMemory = 3, // Optimizes for stability (e.g. log-sum-exp trick). Stability = 4 }; enumPassEfficiency { // A partially efficient optimization pass cannot guarantee that running two // consecutive passes // will return the same result as running a single pass. Partial = 0, // A completely efficient optimization guarantees that running two consecutive // passes is equivalent // to running a single pass. Complete = 1 };
// Base struct representing result of a pass. structPostPassAnalysis { virtual ~PostPassAnalysis() = default; };
// Enum that represents the return type of the analysis. enumPassAnalysisType { // An empty analysis is returned. Most likely will return PostPassAnalysis. Empty = 0, // A count based analysis is returned. Most likely of type // CountBasedPassAnalysis CountBased = 1 };
// Pass Analysis done after a predicate based pass. structCountBasedPassAnalysis : PostPassAnalysis { Pass *pass; ... boolgraphChanged(); boolnumSucceededTransforms();
...
// Whether or not a repeated application of the pass might be useful. boolfixedPointOptimizationNeeded(){ returnthis->graphChanged() && pass->getPassEfficiency() == PassEfficiency::Partial; } ... };
这里比较重要的是 runTransform 是如何修改图的,这个需要之后对 ONNX 的 IR, Node, Graph 等内容进行分析
FullGraphBasedPass
在图上的优化:
1 2 3 4 5 6 7 8 9
// The most general pass which allows the user to run a pass given only a graph. classFullGraphBasedPass : public Pass { public: explicitFullGraphBasedPass(PassType pass_type, PassEfficiency pass_efficiency, PassOptimizationType pass_optimization_type) : Pass(pass_type, pass_efficiency, pass_optimization_type) {} ~FullGraphBasedPass() override; };
unsignedintEliminateDead(Graph& graph){ unsignedint nodes_removed = 0; auto nodes = graph.nodes().reverse(); for (auto it = nodes.begin(); it != nodes.end(); it++) { auto node = *it; if (!node->hasUses()) { nodes_removed++; it.destroyCurrent(); } } return nodes_removed; }
可以看到,nodes 是图中节点的拓扑排序
ONNX IR
位于源文件 onnx/ir.h, 有三个关键结构 Value, Node, Graph
1 2 3 4 5 6 7 8 9 10 11 12 13
// Graph represents one "function" of computation. // It uses a simple ownership model where the graph owns all the nodes inside it. // All references inside the graph are raw pointers. // Destroying the Graph will invalidate any pointers to nodes in the graph. structGraph;
// Node is the base class of the IR graph. It represents one computation // and dependencies on a list of Values. The "prim-ops", so to speak. structNode;
// A Value represents an input or output to node that is either a // Tensor or an opaque Handle object, as determined by type(). structValue;
Value 持有定义本身的 Node 的引用,即一个 Value 是一个 Node 的 output 之一,即 use-def关系中的 def关系
std::unordered_set<const Node*> all_nodes; std::unordered_set<const Value*> all_values; ... Node* const output_; Node* const input_; ... Node* const initializer_node_; // Create an independent node list for those initializers do not exist in input std::vector<Tensor> initializers_; std::vector<std::string> initializer_names_;
... };
ONNX IR 的组织模式和传统编译器十分甚至九分的相似:function, value, def-use 等概念. 现在还有最后一块拼图,value 如何反向引用到其uses