Moved TF sessions used in Egamma PFID to global cache

EgammaDNNHelper was storing TF graphs globally by job, but TF sessions were owned my the GSFElectronProducer and GEDPhotonProducers. This commit moves the TFSessions in the EgammaDNNHelper, making them global by job.
cms-sw · Nov 11, 2024 · 83d1d98 · 83d1d98
1 parent b96fd02
commit 83d1d98
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 57 deletions.
diff --git a/RecoEgamma/EgammaElectronProducers/plugins/GsfElectronProducer.cc b/RecoEgamma/EgammaElectronProducers/plugins/GsfElectronProducer.cc
@@ -35,8 +35,7 @@ namespace {
                      const GsfElectronAlgo::HeavyObjectCache* hoc,
                      reco::VertexCollection const& vertices,
                      bool dnnPFidEnabled,
-                     float extetaboundary,
-                     const std::vector<tensorflow::Session*>& tfSessions) {
+                     float extetaboundary) {
     std::vector<GsfElectron::MvaOutput> mva_outputs(electrons.size());
     size_t iele = 0;
     for (auto& el : electrons) {
@@ -53,7 +52,7 @@ namespace {
     if (dnnPFidEnabled) {
       // Here send the list of electrons to the ElectronDNNEstimator and get back the values for all the electrons in one go
       LogDebug("GsfElectronProducer") << "Getting DNN PFId for ele";
-      const auto& dnn_ele_pfid = hoc->iElectronDNNEstimator->evaluate(electrons, tfSessions);
+      const auto& dnn_ele_pfid = hoc->iElectronDNNEstimator->evaluate(electrons);
       int jele = 0;
       for (auto& el : electrons) {
         const auto& [iModel, values] = dnn_ele_pfid[jele];
@@ -165,8 +164,6 @@ class GsfElectronProducer : public edm::stream::EDProducer<edm::GlobalCache<GsfE
   bool dnnPFidEnabled_;
   float extetaboundary_;
 
-  std::vector<tensorflow::Session*> tfSessions_;
-
   edm::ESGetToken<HcalPFCuts, HcalPFCutsRcd> hcalCutsToken_;
   bool cutsFromDB_;
 };
@@ -572,17 +569,9 @@ GsfElectronProducer::GsfElectronProducer(const edm::ParameterSet& cfg, const Gsf
       cfg.getParameter<edm::ParameterSet>("trkIsolHEEP03Cfg"),
       cfg.getParameter<edm::ParameterSet>("trkIsolHEEP04Cfg"),
       consumesCollector());
-
-  if (dnnPFidEnabled_) {
-    tfSessions_ = gcache->iElectronDNNEstimator->getSessions();
-  }
 }
 
-void GsfElectronProducer::endStream() {
-  for (auto session : tfSessions_) {
-    tensorflow::closeSession(session);
-  }
-}
+void GsfElectronProducer::endStream() {}
 
 void GsfElectronProducer::checkEcalSeedingParameters(edm::ParameterSet const& pset) {
   if (!pset.exists("SeedConfiguration")) {
@@ -761,8 +750,7 @@ void GsfElectronProducer::produce(edm::Event& event, const edm::EventSetup& setu
     for (auto& el : electrons) {
       el.setMvaInput(gsfMVAInputMap.find(el.gsfTrack())->second);  // set Run2 MVA inputs
     }
-    setMVAOutputs(
-        electrons, globalCache(), event.get(inputCfg_.vtxCollectionTag), dnnPFidEnabled_, extetaboundary_, tfSessions_);
+    setMVAOutputs(electrons, globalCache(), event.get(inputCfg_.vtxCollectionTag), dnnPFidEnabled_, extetaboundary_);
   }
 
   // all electrons

diff --git a/RecoEgamma/EgammaPhotonProducers/src/GEDPhotonProducer.cc b/RecoEgamma/EgammaPhotonProducers/src/GEDPhotonProducer.cc
@@ -222,7 +222,6 @@ class GEDPhotonProducer : public edm::stream::EDProducer<edm::GlobalCache<CacheD
 
   // DNN for PFID photon enabled
   bool dnnPFidEnabled_;
-  std::vector<tensorflow::Session*> tfSessions_;
 
   double ecaldrMax_;
   double ecaldrVetoBarrel_;
@@ -477,21 +476,14 @@ GEDPhotonProducer::GEDPhotonProducer(const edm::ParameterSet& config, const Cach
 
   const auto& pset_dnn = config.getParameter<edm::ParameterSet>("PhotonDNNPFid");
   dnnPFidEnabled_ = pset_dnn.getParameter<bool>("enabled");
-  if (dnnPFidEnabled_) {
-    tfSessions_ = gcache->photonDNNEstimator->getSessions();
-  }
 }
 
 std::unique_ptr<CacheData> GEDPhotonProducer::initializeGlobalCache(const edm::ParameterSet& config) {
   // this method is supposed to create, initialize and return a CacheData instance
   return std::make_unique<CacheData>(config);
 }
 
-void GEDPhotonProducer::endStream() {
-  for (auto session : tfSessions_) {
-    tensorflow::closeSession(session);
-  }
-}
+void GEDPhotonProducer::endStream() {}
 
 void GEDPhotonProducer::produce(edm::Event& theEvent, const edm::EventSetup& eventSetup) {
   using namespace edm;
@@ -1041,7 +1033,7 @@ void GEDPhotonProducer::fillPhotonCollection(edm::Event& evt,
   if (dnnPFidEnabled_) {
     // Here send the list of photons to the PhotonDNNEstimator and get back the values for all the photons in one go
     LogDebug("GEDPhotonProducer") << "Getting DNN PFId for photons";
-    const auto& dnn_photon_pfid = globalCache()->photonDNNEstimator->evaluate(outputPhotonCollection, tfSessions_);
+    const auto& dnn_photon_pfid = globalCache()->photonDNNEstimator->evaluate(outputPhotonCollection);
     size_t ipho = 0;
     for (auto& photon : outputPhotonCollection) {
       const auto& [iModel, values] = dnn_photon_pfid[ipho];

diff --git a/RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h b/RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h
@@ -41,20 +41,21 @@ namespace egammaTools {
   class EgammaDNNHelper {
   public:
     EgammaDNNHelper(const DNNConfiguration&, const ModelSelector& sel, const std::vector<std::string>& availableVars);
+    //Destructor to close TF sessions
+    ~EgammaDNNHelper();
 
-    std::vector<tensorflow::Session*> getSessions() const;
     // Function getting the input vector for a specific electron, already scaled
     // together with the model index it has to be used.
     // The model index is determined by the ModelSelector functor passed in the constructor
     // which has access to all the variables.
     std::pair<uint, std::vector<float>> getScaledInputs(const std::map<std::string, float>& variables) const;
 
     std::vector<std::pair<uint, std::vector<float>>> evaluate(
-        const std::vector<std::map<std::string, float>>& candidates,
-        const std::vector<tensorflow::Session*>& sessions) const;
+        const std::vector<std::map<std::string, float>>& candidates) const;
 
   private:
     void initTensorFlowGraphs();
+    void initTensorFlowSessions();
     void initScalerFiles(const std::vector<std::string>& availableVars);
 
     const DNNConfiguration cfg_;
@@ -65,6 +66,7 @@ namespace egammaTools {
     std::vector<uint> nInputs_;
 
     std::vector<std::unique_ptr<const tensorflow::GraphDef>> graphDefs_;
+    std::vector<std::unique_ptr<tensorflow::Session>> sessions_;
 
     // List of input variables for each of the model;
     std::vector<std::vector<ScalerConfiguration>> featuresMap_;

diff --git a/RecoEgamma/EgammaTools/src/EgammaDNNHelper.cc b/RecoEgamma/EgammaTools/src/EgammaDNNHelper.cc
@@ -9,31 +9,42 @@ using namespace egammaTools;
 EgammaDNNHelper::EgammaDNNHelper(const DNNConfiguration& cfg,
                                  const ModelSelector& modelSelector,
                                  const std::vector<std::string>& availableVars)
-    : cfg_(cfg), modelSelector_(modelSelector), nModels_(cfg_.modelsFiles.size()), graphDefs_(cfg_.modelsFiles.size()) {
+    : cfg_(cfg),
+      modelSelector_(modelSelector),
+      nModels_(cfg_.modelsFiles.size()),
+      graphDefs_(cfg_.modelsFiles.size()),
+      sessions_(cfg_.modelsFiles.size()) {
   initTensorFlowGraphs();
+  initTensorFlowSessions();
   initScalerFiles(availableVars);
 }
 
+EgammaDNNHelper::~EgammaDNNHelper() {
+  // Closing the sessions when the helper is destroyed at the end of the job.
+  for (const auto& session : sessions_) {
+    session->Close();
+  }
+}
+
 void EgammaDNNHelper::initTensorFlowGraphs() {
   // load the graph definition
   LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs";
   size_t i = 0;
-  for (const auto& model_file : cfg_.modelsFiles) {
+  for (auto& model_file : cfg_.modelsFiles) {
     graphDefs_[i] =
         std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(model_file).fullPath()));
     i++;
   }
 }
 
-std::vector<tensorflow::Session*> EgammaDNNHelper::getSessions() const {
-  std::vector<tensorflow::Session*> sessions;
+void EgammaDNNHelper::initTensorFlowSessions() {
   LogDebug("EgammaDNNHelper") << "Starting " << nModels_ << " TF sessions";
-  sessions.reserve(graphDefs_.size());
+  size_t i = 0;
   for (const auto& graphDef : graphDefs_) {
-    sessions.push_back(tensorflow::createSession(graphDef.get()));
+    sessions_[i] = std::unique_ptr<tensorflow::Session>(tensorflow::createSession(graphDef.get()));
+    i++;
   }
   LogDebug("EgammaDNNHelper") << "TF sessions started";
-  return sessions;
 }
 
 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
@@ -99,8 +110,7 @@ std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
 }
 
 std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
-    const std::vector<std::map<std::string, float>>& candidates,
-    const std::vector<tensorflow::Session*>& sessions) const {
+    const std::vector<std::map<std::string, float>>& candidates) const {
   /*
     Evaluate the PFID DNN for all the electrons/photons. 
     nModels_ are defined depending on modelIndex  --> we need to build N input tensors to evaluate
@@ -162,7 +172,7 @@ std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
       continue;  //Skip model witout inputs
     std::vector<tensorflow::Tensor> output;
     LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << "objects";
-    tensorflow::run(sessions[m], {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
+    tensorflow::run(sessions_[m].get(), {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
     // Get the output and save the ElectronDNNEstimator::outputDim numbers along with the ele index
     const auto& r = output[0].tensor<float, 2>();
     // Iterate on the list of elements in the batch --> many electrons

diff --git a/RecoEgamma/ElectronIdentification/interface/ElectronDNNEstimator.h b/RecoEgamma/ElectronIdentification/interface/ElectronDNNEstimator.h
@@ -13,15 +13,11 @@ class ElectronDNNEstimator {
 public:
   ElectronDNNEstimator(const egammaTools::DNNConfiguration&, const bool useEBModelInGap);
 
-  std::vector<tensorflow::Session*> getSessions() const;
-  ;
-
   // Function returning a map with all the possible variables and their name
   std::map<std::string, float> getInputsVars(const reco::GsfElectron& ele) const;
 
   // Evaluate the DNN on all the electrons with the correct model
-  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::GsfElectronCollection& ele,
-                                                            const std::vector<tensorflow::Session*>& sessions) const;
+  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::GsfElectronCollection& ele) const;
 
   // List of input variables names used to check the variables request as
   // inputs in a dynamic way from configuration file.

diff --git a/RecoEgamma/ElectronIdentification/src/ElectronDNNEstimator.cc b/RecoEgamma/ElectronIdentification/src/ElectronDNNEstimator.cc
@@ -47,8 +47,6 @@ ElectronDNNEstimator::ElectronDNNEstimator(const egammaTools::DNNConfiguration&
                  ElectronDNNEstimator::dnnAvaibleInputs),
       useEBModelInGap_(useEBModelInGap) {}
 
-std::vector<tensorflow::Session*> ElectronDNNEstimator::getSessions() const { return dnnHelper_.getSessions(); };
-
 const std::vector<std::string> ElectronDNNEstimator::dnnAvaibleInputs = {
     {"pt",
      "eta",
@@ -157,12 +155,12 @@ std::map<std::string, float> ElectronDNNEstimator::getInputsVars(const reco::Gsf
 }
 
 std::vector<std::pair<uint, std::vector<float>>> ElectronDNNEstimator::evaluate(
-    const reco::GsfElectronCollection& electrons, const std::vector<tensorflow::Session*>& sessions) const {
+    const reco::GsfElectronCollection& electrons) const {
   // Collect the map of variables for each candidate and call the dnnHelper
   // Scaling, model selection and running is performed in the helper
   std::vector<std::map<std::string, float>> inputs;
   for (const auto& ele : electrons) {
     inputs.push_back(getInputsVars(ele));
   }
-  return dnnHelper_.evaluate(inputs, sessions);
+  return dnnHelper_.evaluate(inputs);
 }
diff --git a/RecoEgamma/PhotonIdentification/interface/PhotonDNNEstimator.h b/RecoEgamma/PhotonIdentification/interface/PhotonDNNEstimator.h
@@ -14,15 +14,11 @@ class PhotonDNNEstimator {
 public:
   PhotonDNNEstimator(const egammaTools::DNNConfiguration&, const bool useEBModelInGap);
 
-  std::vector<tensorflow::Session*> getSessions() const;
-  ;
-
   // Function returning a map with all the possible variables and their name
   std::map<std::string, float> getInputsVars(const reco::Photon& ele) const;
 
   // Evaluate the DNN on all the electrons with the correct model
-  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::PhotonCollection& ele,
-                                                            const std::vector<tensorflow::Session*>& sessions) const;
+  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::PhotonCollection& ele) const;
 
   // List of input variables names used to check the variables request as
   // inputs in a dynamic way from configuration file.

diff --git a/RecoEgamma/PhotonIdentification/src/PhotonDNNEstimator.cc b/RecoEgamma/PhotonIdentification/src/PhotonDNNEstimator.cc
@@ -29,8 +29,6 @@ PhotonDNNEstimator::PhotonDNNEstimator(const egammaTools::DNNConfiguration& cfg,
                  PhotonDNNEstimator::dnnAvaibleInputs),
       useEBModelInGap_(useEBModelInGap) {}
 
-std::vector<tensorflow::Session*> PhotonDNNEstimator::getSessions() const { return dnnHelper_.getSessions(); };
-
 const std::vector<std::string> PhotonDNNEstimator::dnnAvaibleInputs = {{"pt",
                                                                         "eta",
                                                                         "hadTowOverEm",
@@ -67,12 +65,12 @@ std::map<std::string, float> PhotonDNNEstimator::getInputsVars(const reco::Photo
 }
 
 std::vector<std::pair<uint, std::vector<float>>> PhotonDNNEstimator::evaluate(
-    const reco::PhotonCollection& photons, const std::vector<tensorflow::Session*>& sessions) const {
+    const reco::PhotonCollection& photons) const {
   // Collect the map of variables for each candidate and call the dnnHelper
   // Scaling, model selection and running is performed in the helper
   std::vector<std::map<std::string, float>> inputs;
   for (const auto& photon : photons) {
     inputs.push_back(getInputsVars(photon));
   }
-  return dnnHelper_.evaluate(inputs, sessions);
+  return dnnHelper_.evaluate(inputs);
 }