From 08f81501644a1a777ebf5b6d7e0ce0c26314e64e Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 17 Apr 2026 20:55:21 +0200 Subject: [PATCH 1/4] ITS: add memory stats Signed-off-by: Felix Schlepper --- .../include/ITStracking/BoundedAllocator.h | 158 ++++++++++++++---- .../tracking/include/ITStracking/Tracker.h | 11 +- .../tracking/include/ITStracking/Vertexer.h | 4 + Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 26 +-- .../ITS/tracking/src/TrackingInterface.cxx | 25 +-- .../ITSMFT/ITS/tracking/src/Vertexer.cxx | 10 +- 6 files changed, 160 insertions(+), 74 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h index 91d5edeedcdb1..01f5674fc611b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h @@ -22,13 +22,18 @@ #include #include +#if !defined(__HIPCC__) && !defined(__CUDACC__) +#include +#include +#include "Framework/Logger.h" +#endif #include "ITStracking/ExternalAllocator.h" - -#include "GPUCommonLogger.h" +#include "ITStracking/Constants.h" namespace o2::its { +// #define BOUNDED_MR_STATS class BoundedMemoryResource final : public std::pmr::memory_resource { public: @@ -36,49 +41,71 @@ class BoundedMemoryResource final : public std::pmr::memory_resource { public: MemoryLimitExceeded(size_t attempted, size_t used, size_t max) - : mAttempted(attempted), mUsed(used), mMax(max) {} - const char* what() const noexcept final { - static thread_local char msg[256]; - if (mAttempted != 0) { - snprintf(msg, sizeof(msg), - "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", - mAttempted, mUsed, mMax); + char buf[256]; + if (attempted != 0) { + (void)snprintf(buf, sizeof(buf), "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", attempted, used, max); } else { - snprintf(msg, sizeof(msg), - "New set maximum below current used (newMax: %zu, used: %zu)", - mMax, mUsed); + (void)snprintf(buf, sizeof(buf), "New set maximum below current used (newMax: %zu, used: %zu)", max, used); } - return msg; + mMsg = buf; } + const char* what() const noexcept final { return mMsg.c_str(); } private: - size_t mAttempted{0}, mUsed{0}, mMax{0}; + std::string mMsg; }; - BoundedMemoryResource(size_t maxBytes = std::numeric_limits::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource()) + BoundedMemoryResource(size_t maxBytes = std::numeric_limits::max(), + std::pmr::memory_resource* upstream = std::pmr::get_default_resource()) : mMaxMemory(maxBytes), mUpstream(upstream) {} - BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique(alloc)), mUpstream(mAdaptor.get()) {} + + BoundedMemoryResource(ExternalAllocator* alloc, + size_t maxBytes = std::numeric_limits::max()) + : mMaxMemory(maxBytes), + mAdaptor(std::make_unique(alloc)), + mUpstream(mAdaptor.get()) {} void* do_allocate(size_t bytes, size_t alignment) final { - size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)}; + size_t new_used{0}; + size_t current_used{mUsedMemory.load(std::memory_order_relaxed)}; do { new_used = current_used + bytes; - if (new_used > mMaxMemory) { - ++mCountThrow; - throw MemoryLimitExceeded(new_used, current_used, mMaxMemory); + if (new_used > mMaxMemory.load(std::memory_order_relaxed)) { + mCountThrow.fetch_add(1, std::memory_order_relaxed); + throw MemoryLimitExceeded(new_used, current_used, + mMaxMemory.load(std::memory_order_relaxed)); } } while (!mUsedMemory.compare_exchange_weak(current_used, new_used, std::memory_order_acq_rel, std::memory_order_relaxed)); + void* p{nullptr}; try { p = mUpstream->allocate(bytes, alignment); } catch (...) { mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed); +#ifdef BOUNDED_MR_STATS + mStats.upstreamFailures.fetch_add(1, std::memory_order_relaxed); +#endif throw; } + +#ifdef BOUNDED_MR_STATS + size_t peak = mStats.peak.load(std::memory_order_relaxed); + while (new_used > peak && + !mStats.peak.compare_exchange_weak(peak, new_used, + std::memory_order_relaxed)) { + } + mStats.live.fetch_add(1, std::memory_order_relaxed); + mStats.nAlloc.fetch_add(1, std::memory_order_relaxed); + mStats.totalAlloc.fetch_add(bytes, std::memory_order_relaxed); + + size_t ma = mStats.maxAlign.load(std::memory_order_relaxed); + while (alignment > ma && !mStats.maxAlign.compare_exchange_weak(ma, alignment, std::memory_order_relaxed)) { + } +#endif return p; } @@ -86,6 +113,11 @@ class BoundedMemoryResource final : public std::pmr::memory_resource { mUpstream->deallocate(p, bytes, alignment); mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed); +#ifdef BOUNDED_MR_STATS + mStats.live.fetch_sub(1, std::memory_order_relaxed); + mStats.nFree.fetch_add(1, std::memory_order_relaxed); + mStats.totalFreed.fetch_add(bytes, std::memory_order_relaxed); +#endif } bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final @@ -93,38 +125,94 @@ class BoundedMemoryResource final : public std::pmr::memory_resource return this == &other; } - size_t getUsedMemory() const noexcept { return mUsedMemory.load(); } - size_t getMaxMemory() const noexcept { return mMaxMemory; } + [[nodiscard]] size_t getUsedMemory() const noexcept + { + return mUsedMemory.load(std::memory_order_relaxed); + } + [[nodiscard]] size_t getMaxMemory() const noexcept + { + return mMaxMemory.load(std::memory_order_relaxed); + } + [[nodiscard]] size_t getThrowCount() const noexcept + { + return mCountThrow.load(std::memory_order_relaxed); + } + void setMaxMemory(size_t max) { - if (max == mMaxMemory) { + size_t current = mMaxMemory.load(std::memory_order_relaxed); + if (max == current) { return; } - size_t used = mUsedMemory.load(std::memory_order_acquire); - if (used > max) { - ++mCountThrow; - throw MemoryLimitExceeded(0, used, max); + for (;;) { + size_t used = mUsedMemory.load(std::memory_order_acquire); + if (used > max) { + mCountThrow.fetch_add(1, std::memory_order_relaxed); + throw MemoryLimitExceeded(0, used, max); + } + if (mMaxMemory.compare_exchange_weak(current, max, + std::memory_order_release, + std::memory_order_relaxed)) { + return; + } + if (current == max) { + return; + } } - mMaxMemory.store(max, std::memory_order_release); } - void print() const +#if !defined(__HIPCC__) && !defined(__CUDACC__) + std::string asString() const { -#if !defined(GPUCA_GPUCODE_DEVICE) - constexpr double GB{1024 * 1024 * 1024}; - auto throw_ = mCountThrow.load(std::memory_order_relaxed); - auto used = static_cast(mUsedMemory.load(std::memory_order_relaxed)); - LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} ({:.2f}%)", - throw_, (double)mMaxMemory / GB, used / GB, 100. * used / (double)mMaxMemory); + const auto throw_ = mCountThrow.load(std::memory_order_relaxed); + const auto used = static_cast(mUsedMemory.load(std::memory_order_relaxed)); + const auto maxm = mMaxMemory.load(std::memory_order_relaxed); + std::string ret; + if (maxm == std::numeric_limits::max()) { + ret += std::format("maxthrow={} maxmem=unbounded used={:.2f} GB", throw_, used / constants::GB); + } else { + ret += std::format("maxthrow={} maxmem={:.2f} GB used={:.2f} GB ({:.2f}%)", throw_, (double)maxm / constants::GB, used / constants::GB, 100.0 * used / (double)maxm); + } +#ifdef BOUNDED_MR_STATS + ret += std::format(" peak={:.2f} GB live={} nAlloc={} nFree={} totalAlloc={:.2f} GB totalFreed={:.2f} GB maxAlign={} upstreamFail={}", + (float)mStats.peak.load(std::memory_order_relaxed) / constants::GB, + mStats.live.load(std::memory_order_relaxed), + mStats.nAlloc.load(std::memory_order_relaxed), + mStats.nFree.load(std::memory_order_relaxed), + (float)mStats.totalAlloc.load(std::memory_order_relaxed) / constants::GB, + (float)mStats.totalFreed.load(std::memory_order_relaxed) / constants::GB, + mStats.maxAlign.load(std::memory_order_relaxed), + mStats.upstreamFailures.load(std::memory_order_relaxed)); #endif + return ret; } + void print() const + { + LOGP(info, "{}", asString()); + } +#endif + private: std::atomic mMaxMemory{std::numeric_limits::max()}; std::atomic mCountThrow{0}; std::atomic mUsedMemory{0}; std::unique_ptr mAdaptor{nullptr}; std::pmr::memory_resource* mUpstream{nullptr}; + +#ifdef BOUNDED_MR_STATS + struct Stats { + std::atomic peak{0}; + std::atomic live{0}; + std::atomic nAlloc{0}; + std::atomic nFree{0}; + std::atomic totalAlloc{0}; + std::atomic totalFreed{0}; + std::atomic maxAlign{0}; + std::atomic upstreamFailures{0}; + }; + Stats mStats{}; +#endif }; template diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index a1a0bf7249a21..ad8ea5b3b56af 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -54,7 +53,7 @@ class Tracker void adoptTimeFrame(TimeFrame& tf); - void clustersToTracks( + float clustersToTracks( const LogFunc& = [](const std::string& s) { std::cout << s << '\n'; }, const LogFunc& = [](const std::string& s) { std::cerr << s << '\n'; }); @@ -78,7 +77,7 @@ class Tracker void sortTracks(); template - float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args); + float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, const LogFunc& logger, F&&... args); TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -106,7 +105,7 @@ class Tracker template template -float Tracker::evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, const LogFunc& logger, F&&... args) { float diff{0.f}; @@ -140,6 +139,10 @@ float Tracker::evaluateTask(void (Tracker::*task)(T...), std:: (this->*task)(std::forward(args)...); } + if (mTrkParams[iteration].PrintMemory) { + LOGP(info, "iter:{}:{}: {}", iteration, StateNames[mCurState], mMemoryPool->asString()); + } + return diff; } diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h index a045ba1639b13..f1cf081473264 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h @@ -152,6 +152,10 @@ float Vertexer::evaluateTask(void (Vertexer::*task)(T...), std (this->*task)(std::forward(args)...); } + if (mVertParams[iteration].PrintMemory) { + LOGP(info, "iter:{}:{}: {}", iteration, StateNames[mCurState], mMemoryPool->asString()); + } + return diff; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index fa881789af296..3e91788c9881c 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -34,11 +34,11 @@ Tracker::Tracker(TrackerTraits* traits) : mTraits(traits) } template -void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) +float Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& error) { LogFunc evalLog = [](const std::string&) {}; - double total{0}; + float total{0}; mTraits->updateTrackingParameters(mTrkParams); int maxNvertices{-1}; @@ -71,12 +71,13 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& er if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->useUPCMask(); } - float timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; + float timeFrame{0.}, timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; size_t nTracklets{0}, nCells{0}, nNeighbours{0}; int nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; iVertex = std::min(maxNvertices, 0); logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); - total += evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, logger, iteration); + total += timeFrame = evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, evalLog, iteration); + logger(std::format(" - TimeFrame initialisation completed in {:.2f} ms", timeFrame)); do { timeTracklets += evaluateTask(&Tracker::computeTracklets, StateNames[mCurState = Trackleting], iteration, evalLog, iteration, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); @@ -91,24 +92,18 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& er logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTrkParams[iteration].PrintMemory) { - mMemoryPool->print(); - } - } - if constexpr (constants::DoTimeBenchmarks) { - logger(std::format("=== TimeSlice {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeSlice, total, mTraits->getNThreads())); } } catch (const BoundedMemoryResource::MemoryLimitExceeded& err) { handleException(err); - return; + return -1.f; } catch (const std::bad_alloc& err) { handleException(err); - return; + return -1.f; } catch (const std::exception& err) { error(std::format("Uncaught exception, all bets are off... {}", err.what())); // clear tracks explicitly since if not fatalising on exception this may contain partial output mTimeFrame->getTracks().clear(); - return; + return -1.f; } if (mTimeFrame->hasMCinformation()) { @@ -120,10 +115,7 @@ void Tracker::clustersToTracks(const LogFunc& logger, const LogFunc& er ++mTimeFrameCounter; mTotalTime += total; - if (mTrkParams[0].PrintMemory) { - mTimeFrame->printArtefactsMemory(); - mMemoryPool->print(); - } + return total; } template diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index fcd9024a74709..427831b1ed484 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -188,7 +188,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) mTimeFrame->getROFMaskView().print(iLayer); } - float vertexerElapsedTime{0.f}; + float vertexerElapsedTime{0.f}, trackerElapsedTime{0.f}; if (mRunVertexer) { // Run seeding vertexer vertexerElapsedTime = mVertexer->clustersToVertices(logger); @@ -232,26 +232,29 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) } if (mRunVertexer && hasClusters) { - LOG(info) << fmt::format(" - Vertex seeding total elapsed time: {} ms for {} vertices found", - vertexerElapsedTime, - mTimeFrame->getPrimaryVerticesNum()); + LOGP(info, " + Vertex seeding total elapsed time: {} ms for {} vertices found", vertexerElapsedTime, mTimeFrame->getPrimaryVerticesNum()); } if (mOverrideBeamEstimation) { - LOG(info) << fmt::format(" - Beam position set to: {}, {} from meanvertex object", mTimeFrame->getBeamX(), mTimeFrame->getBeamY()); + LOG(info) << fmt::format(" + Beam position set to: {}, {} from meanvertex object", mTimeFrame->getBeamX(), mTimeFrame->getBeamY()); } else { - LOG(info) << fmt::format(" - Beam position computed for the TF: {}, {}", mTimeFrame->getBeamX(), mTimeFrame->getBeamY()); + LOG(info) << fmt::format(" + Beam position computed for the TF: {}, {}", mTimeFrame->getBeamX(), mTimeFrame->getBeamY()); } if (hasClusters) { mTimeFrame->setMultiplicityCutMask(processMultiplictyMask); mTimeFrame->setUPCCutMask(processUPCMask); - // Run CA tracker if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracks(logger, fatalLogger); + trackerElapsedTime = mTracker->clustersToTracks(logger, fatalLogger); } else { - mTracker->clustersToTracks(logger, errorLogger); + trackerElapsedTime = mTracker->clustersToTracks(logger, errorLogger); } + LOGP(info, " + Tracking total elapse time: {} ms for {} tracks found", trackerElapsedTime, mTimeFrame->getNumberOfTracks()); + } + if constexpr (constants::DoTimeBenchmarks) { + const auto& trackConf = o2::its::TrackerParamConfig::Instance(); + const auto& vertConf = o2::its::VertexerParamConfig::Instance(); + logger(std::format("=== TimeSlice {} processing completed in: {:.2f} ms using {}/{} thread(s) ===", tfInfo.timeslice, trackerElapsedTime + vertexerElapsedTime, vertConf.nThreads, trackConf.nThreads)); } size_t totTracks{mTimeFrame->getNumberOfTracks()}, totClusIDs{mTimeFrame->getNumberOfUsedClusters()}; @@ -260,7 +263,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) allClusIdx.reserve(totClusIDs); if (mTimeFrame->hasBogusClusters()) { - LOG(warning) << fmt::format(" - The processed timeframe had {} clusters with wild z coordinates, check the dictionaries", mTimeFrame->hasBogusClusters()); + LOG(warning) << fmt::format(" + The processed timeframe had {} clusters with wild z coordinates, check the dictionaries", mTimeFrame->hasBogusClusters()); } auto& tracks = mTimeFrame->getTracks(); @@ -344,7 +347,7 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) } } - LOGP(info, "ITSTracker pushed {} tracks in {} rofs and {} vertices {}", allTracks.size(), allTrackROFs.size(), vertices.size(), ((mDoStaggering) ? "in staggered-readout mode" : "in normal mode")); + LOGP(info, "ITSTracker pushed {} tracks in {} rofs and {} vertices {}", allTracks.size(), allTrackROFs.size(), vertices.size(), ((mDoStaggering) ? "in staggered-readout mode" : "")); if (mIsMC) { LOGP(info, "ITSTracker pushed {} track labels", allTrackLabels.size()); LOGP(info, "ITSTracker pushed {} vertex labels", allVerticesLabels.size()); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx index cbff174634ec8..556302cb2854f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx @@ -141,13 +141,9 @@ void Vertexer::printEpilog(LogFunc& logger, const unsigned selectedN, const unsigned int vertexN, const unsigned int totalVertexN, const float trackletT, const float selecT, const float vertexT) { - logger(fmt::format(" - {} Vertexer: found {} | {} tracklets in: {} ms", mTraits->getName(), trackletN01, trackletN12, trackletT)); - logger(fmt::format(" - {} Vertexer: selected {} tracklets in: {} ms", mTraits->getName(), selectedN, selecT)); - logger(fmt::format(" - {} Vertexer: found {} vertices in: {} ms (total: {})", mTraits->getName(), vertexN, vertexT, totalVertexN)); - if (mVertParams[0].PrintMemory) { - mTimeFrame->printArtefactsMemory(); - mMemoryPool->print(); - } + logger(fmt::format(" - {} Vertexer: found {} | {} tracklets in: {:.2f} ms", mTraits->getName(), trackletN01, trackletN12, trackletT)); + logger(fmt::format(" - {} Vertexer: selected {} tracklets in: {:.2f} ms", mTraits->getName(), selectedN, selecT)); + logger(fmt::format(" - {} Vertexer: found {} vertices (total: {}) in: {:.2f} ms", mTraits->getName(), vertexN, totalVertexN, vertexT)); } template class Vertexer<7>; From a201ecd0204f635570152c132ea4e9af665699af Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 17 Apr 2026 21:29:17 +0200 Subject: [PATCH 2/4] ITS: clear tracklets after cell finding Signed-off-by: Felix Schlepper --- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 72 ++++++++++++------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 3cf462206bf94..cf4b169d255c2 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,6 @@ #include "ITStracking/Tracklet.h" #include "ReconstructionDataFormats/Track.h" -using o2::base::PropagatorF; - namespace o2::its { @@ -207,7 +206,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer tbb::parallel_for(0, mTrkParams[iteration].TrackletsPerRoad(), [&](const int iLayer) { /// Sort tracklets auto& trkl{mTimeFrame->getTracklets()[iLayer]}; - tbb::parallel_sort(trkl.begin(), trkl.end(), [](const Tracklet& a, const Tracklet& b) -> bool { + std::sort(trkl.begin(), trkl.end(), [](const Tracklet& a, const Tracklet& b) -> bool { if (a.firstClusterIndex != b.firstClusterIndex) { return a.firstClusterIndex < b.firstClusterIndex; } @@ -346,10 +345,14 @@ void TrackerTraits::computeLayerCells(const int iteration) return foundCells; }; - tbb::parallel_for(0, mTrkParams[iteration].CellsPerRoad(), [&](const int iLayer) { + for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { if (mTimeFrame->getTracklets()[iLayer + 1].empty() || mTimeFrame->getTracklets()[iLayer].empty()) { - return; + if (iLayer < mTrkParams[iteration].TrackletsPerRoad()) { + deepVectorClear(mTimeFrame->getTracklets()[iLayer]); + deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); + } + continue; } auto& layerCells = mTimeFrame->getCells()[iLayer]; @@ -368,7 +371,14 @@ void TrackerTraits::computeLayerCells(const int iteration) std::exclusive_scan(perTrackletCount.begin(), perTrackletCount.end(), perTrackletCount.begin(), 0); auto totalCells{perTrackletCount.back()}; if (totalCells == 0) { - return; + if (iLayer > 0) { + auto& lut = mTimeFrame->getCellsLookupTable()[iLayer - 1]; + lut.resize(currentLayerTrackletsNum + 1); + std::fill(lut.begin(), lut.end(), 0); + } + deepVectorClear(mTimeFrame->getTracklets()[iLayer]); + deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); + continue; } layerCells.resize(totalCells); @@ -386,20 +396,28 @@ void TrackerTraits::computeLayerCells(const int iteration) lut.resize(currentLayerTrackletsNum + 1); std::copy_n(perTrackletCount.begin(), currentLayerTrackletsNum + 1, lut.begin()); } - }); - /// Create cells labels - if (mTimeFrame->hasMCinformation() && mTrkParams[iteration].createArtefactLabels) { - tbb::parallel_for(0, mTrkParams[iteration].CellsPerRoad(), [&](const int iLayer) { - mTimeFrame->getCellsLabel(iLayer).reserve(mTimeFrame->getCells()[iLayer].size()); - for (const auto& cell : mTimeFrame->getCells()[iLayer]) { + if (mTimeFrame->hasMCinformation() && mTrkParams[iteration].createArtefactLabels) { + auto& labels = mTimeFrame->getCellsLabel(iLayer); + labels.reserve(layerCells.size()); + for (const auto& cell : layerCells) { MCCompLabel currentLab{mTimeFrame->getTrackletsLabel(iLayer)[cell.getFirstTrackletIndex()]}; MCCompLabel nextLab{mTimeFrame->getTrackletsLabel(iLayer + 1)[cell.getSecondTrackletIndex()]}; - mTimeFrame->getCellsLabel(iLayer).emplace_back(currentLab == nextLab ? currentLab : MCCompLabel()); + labels.emplace_back(currentLab == nextLab ? currentLab : MCCompLabel()); } - }); + } + + // Once layer i cells are built and labelled, the corresponding tracklet artefacts are no longer needed. + deepVectorClear(mTimeFrame->getTracklets()[iLayer]); + deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); } }); + + // Clear the trailing tracklet artefacts that are not consumed as the first leg of a cell. + for (int iLayer = mTrkParams[iteration].CellsPerRoad(); iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { + deepVectorClear(mTimeFrame->getTracklets()[iLayer]); + deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); + } } template @@ -509,6 +527,9 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } mTimeFrame->getCells()[iLayer + 1][cellIdx].setLevel(maxLvl); } + + // clear cells LUT + deepVectorClear(mTimeFrame->getCellsLookupTable()[iLayer]); } }); } @@ -752,11 +773,13 @@ void TrackerTraits::findRoads(const int iteration) } deepVectorClear(trackSeeds); - tbb::parallel_sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) { - return a.getChi2() < b.getChi2(); - }); }); + std::sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) { + return a.getChi2() < b.getChi2(); + }); + + mTimeFrame->getTracks().reserve(mTimeFrame->getTracks().size() + tracks.size()); const float smallestROFHalf = mTimeFrame->getROFOverlapTableView().getClockLayer().mROFLength * 0.5f; for (auto& track : tracks) { int nShared = 0; @@ -803,7 +826,6 @@ void TrackerTraits::findRoads(const int iteration) if (track.getTimeStamp().getTimeStampError() > smallestROFHalf) { track.getTimeStamp().setTimeStampError(smallestROFHalf); } - track.setUserField(0); track.getParamOut().setUserField(0); mTimeFrame->getTracks().emplace_back(track); @@ -885,7 +907,7 @@ bool TrackerTraits::fitTrack(TrackITSExt& track, int start, int end, in } nCl++; } - return std::abs(track.getQ2Pt()) < maxQoverPt && track.getChi2() < chi2ndfcut * (nCl * 2 - 5); + return std::abs(track.getQ2Pt()) < maxQoverPt && track.getChi2() < chi2ndfcut * (float)((nCl * 2) - 5); } // create a new seed either from the existing track inner param or reseed from the edgepointd and cluster in the middle @@ -936,17 +958,17 @@ track::TrackParCov TrackerTraits::buildTrackSeed(const Cluster& cluster { const float sign = reverse ? -1.f : 1.f; - float ca, sa; + float ca = NAN, sa = NAN; o2::gpu::CAMath::SinCos(tf3.alphaTrackingFrame, sa, ca); - const float x1 = cluster1.xCoordinate * ca + cluster1.yCoordinate * sa; - const float y1 = -cluster1.xCoordinate * sa + cluster1.yCoordinate * ca; - const float x2 = cluster2.xCoordinate * ca + cluster2.yCoordinate * sa; - const float y2 = -cluster2.xCoordinate * sa + cluster2.yCoordinate * ca; + const float x1 = (cluster1.xCoordinate * ca) + (cluster1.yCoordinate * sa); + const float y1 = (-cluster1.xCoordinate * sa) + (cluster1.yCoordinate * ca); + const float x2 = (cluster2.xCoordinate * ca) + (cluster2.yCoordinate * sa); + const float y2 = (-cluster2.xCoordinate * sa) + (cluster2.yCoordinate * ca); const float x3 = tf3.xTrackingFrame; const float y3 = tf3.positionTrackingFrame[0]; - float snp, q2pt, q2pt2; + float snp = NAN, q2pt = NAN, q2pt2 = NAN; if (mIsZeroField) { const float dx = x3 - x1; const float dy = y3 - y1; From 7fde2e3841bc3f2c7dbe57bb325566c777cb7fe5 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 18 Apr 2026 09:37:03 +0200 Subject: [PATCH 3/4] ITS: separate into cell and track seed class Signed-off-by: Felix Schlepper --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 16 +-- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 2 +- .../GPU/ITStrackingGPU/TrackingKernels.h | 31 ++-- .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu | 30 ++-- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 2 +- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 112 +++++++-------- .../include/ITStracking/BoundedAllocator.h | 2 +- .../ITS/tracking/include/ITStracking/Cell.h | 132 +++++++++++++----- .../tracking/include/ITStracking/Constants.h | 14 +- .../tracking/include/ITStracking/TimeFrame.h | 4 +- .../include/ITStracking/TrackerTraits.h | 8 +- .../ITSMFT/ITS/tracking/src/TimeFrame.cxx | 2 +- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 20 +-- 13 files changed, 219 insertions(+), 156 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index cf1295e08bd76..c87b3d36b9a6a 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -27,11 +27,11 @@ namespace o2::its::gpu template class TimeFrameGPU final : public TimeFrame { - using typename TimeFrame::CellSeedN; using typename TimeFrame::IndexTableUtilsN; using typename TimeFrame::ROFOverlapTableN; using typename TimeFrame::ROFVertexLookupTableN; using typename TimeFrame::ROFMaskTableN; + using typename TimeFrame::TrackSeedN; public: TimeFrameGPU() = default; @@ -72,7 +72,7 @@ class TimeFrameGPU final : public TimeFrame void loadCellsLUTDevice(); void loadTrackSeedsDevice(); void loadTrackSeedsChi2Device(); - void loadTrackSeedsDevice(bounded_vector&); + void loadTrackSeedsDevice(bounded_vector&); void createTrackletsBuffers(const int); void createTrackletsBuffersArray(const int); void createCellsBuffers(const int); @@ -136,8 +136,8 @@ class TimeFrameGPU final : public TimeFrame int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; } int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; } int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; } - CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; } - CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; } + CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; } + TrackSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; } int* getDeviceTrackSeedsLUT() { return mTrackSeedsLUTDevice; } auto getNTrackSeeds() const { return mNTracks; } o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; } @@ -157,7 +157,7 @@ class TimeFrameGPU final : public TimeFrame gsl::span getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; } gsl::span getDeviceCellLUTs() { return mCellsLUTDevice; } gsl::span getDeviceTracklets() { return mTrackletsDevice; } - gsl::span getDeviceCells() { return mCellsDevice; } + gsl::span getDeviceCells() { return mCellsDevice; } // Overridden getters size_t getNumberOfTracklets() const final; @@ -203,10 +203,10 @@ class TimeFrameGPU final : public TimeFrame int** mNeighboursCellDeviceArray{nullptr}; int** mNeighboursCellLUTDeviceArray{nullptr}; int** mTrackletsLUTDeviceArray{nullptr}; - std::array mCellsDevice; - CellSeedN** mCellsDeviceArray; + std::array mCellsDevice; + CellSeed** mCellsDeviceArray; std::array mNeighboursIndexTablesDevice; - CellSeedN* mTrackSeedsDevice{nullptr}; + TrackSeedN* mTrackSeedsDevice{nullptr}; int* mTrackSeedsLUTDevice{nullptr}; unsigned int mNTracks{0}; std::array mCellSeedsDevice; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index 38d2a8ad5ddc2..81d870c5b46c2 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -19,7 +19,7 @@ namespace o2::its { -template +template class TrackerTraitsGPU final : public TrackerTraits { using typename TrackerTraits::IndexTableUtilsN; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 6a977f8fef21a..2ee70f447a086 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -24,8 +24,9 @@ namespace o2::its { -template class CellSeed; +template +class TrackSeed; class TrackingFrameInfo; class Tracklet; template @@ -34,7 +35,7 @@ class Cluster; class TrackITSExt; class ExternalAllocator; -template +template void countTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, const int layer, @@ -62,7 +63,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); -template +template void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, const int layer, @@ -101,7 +102,7 @@ void countCellsHandler(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, const float bz, @@ -119,7 +120,7 @@ void computeCellsHandler(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, const float bz, @@ -129,7 +130,7 @@ void computeCellsHandler(const Cluster** sortedClusters, gpu::Streams& streams); template -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, +void countCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUTs, int** cellsLUTs, gpuPair* cellNeighbours, @@ -145,7 +146,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, gpu::Stream& stream); template -void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, +void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUTs, int** cellsLUTs, gpuPair* cellNeighbours, @@ -165,17 +166,17 @@ int filterCellNeighboursHandler(gpuPair*, gpu::Stream&, o2::its::ExternalAllocator* = nullptr); -template +template void processNeighboursHandler(const int startLayer, const int startLevel, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector>& seedsHost, + bounded_vector>& seedsHost, const float bz, const float MaxChi2ClusterAttachment, const float maxChi2NDF, @@ -183,8 +184,8 @@ void processNeighboursHandler(const int startLayer, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); -template -void countTrackSeedHandler(CellSeed* trackSeeds, +template +void countTrackSeedHandler(TrackSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, int* seedLUT, @@ -202,8 +203,8 @@ void countTrackSeedHandler(CellSeed* trackSeeds, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); -template -void computeTrackSeedHandler(CellSeed* trackSeeds, +template +void computeTrackSeedHandler(TrackSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, o2::its::TrackITSExt* tracks, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index bd5e7a8bc59f8..b9091eebde377 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -433,11 +433,11 @@ void TimeFrameGPU::loadCellsDevice() { GPUTimer timer(mGpuStreams, "loading cell seeds", NLayers - 2); for (auto iLayer{0}; iLayer < NLayers - 2; ++iLayer) { - GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB); - allocMemAsync(reinterpret_cast(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->hasFrameworkAllocator()); + GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->hasFrameworkAllocator()); allocMemAsync(reinterpret_cast(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->hasFrameworkAllocator()); // accessory for the neigh. finding. GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get())); - GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); + GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get())); } } @@ -465,8 +465,8 @@ void TimeFrameGPU::createCellsBuffersArray(const int iteration) { if (!iteration) { GPUTimer timer("creating cells buffers array"); - allocMem(reinterpret_cast(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeedN*), this->hasFrameworkAllocator()); - GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice)); + allocMem(reinterpret_cast(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator()); + GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice)); } } @@ -477,10 +477,10 @@ void TimeFrameGPU::createCellsBuffers(const int layer) mNCells[layer] = 0; GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); mGpuStreams[layer].sync(); // ensure number of cells is correct - GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB); - allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); - GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer].get())); - GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); + GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); + GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer].get())); + GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); } template @@ -495,12 +495,12 @@ void TimeFrameGPU::loadCellsLUTDevice() } template -void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) +void TimeFrameGPU::loadTrackSeedsDevice(bounded_vector& seeds) { GPUTimer timer("loading track seeds"); - GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB); - allocMem(reinterpret_cast(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); - GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice)); + GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(TrackSeedN) / constants::MB); + allocMem(reinterpret_cast(&mTrackSeedsDevice), seeds.size() * sizeof(TrackSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); + GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(TrackSeedN), cudaMemcpyHostToDevice)); GPULog("gpu-transfer: creating {} track seeds LUT, for {:.2f} MB.", seeds.size() + 1, (seeds.size() + 1) * sizeof(int) / constants::MB); allocMem(reinterpret_cast(&mTrackSeedsLUTDevice), (seeds.size() + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); GPUChkErrS(cudaMemset(mTrackSeedsLUTDevice, 0, (seeds.size() + 1) * sizeof(int))); @@ -537,9 +537,9 @@ void TimeFrameGPU::downloadCellsDevice() { GPUTimer timer(mGpuStreams, "downloading cells", NLayers - 2); for (int iLayer{0}; iLayer < NLayers - 2; ++iLayer) { - GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB); + GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB); this->mCells[iLayer].resize(mNCells[iLayer]); - GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); + GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get())); } } diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index f7a416808fec7..4a858dd7222e3 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -265,7 +265,7 @@ void TrackerTraitsGPU::findRoads(const int iteration) { for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) { const int minimumLayer{startLevel - 1}; - bounded_vector> trackSeeds(this->getMemoryPool().get()); + bounded_vector> trackSeeds(this->getMemoryPool().get()); for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 54f92411a3df1..adeba0b684e21 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -163,7 +163,7 @@ GPUdii() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1, } template -GPUdii() TrackITSExt seedTrackForRefit(const CellSeed& seed, +GPUdii() TrackITSExt seedTrackForRefit(const TrackSeed& seed, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, const float* layerRadii, @@ -265,7 +265,7 @@ struct seed_selector { float maxChi2; GPUhd() seed_selector(float maxQ2Pt, float maxChi2) : maxQ2Pt(maxQ2Pt), maxChi2(maxChi2) {} - GPUhd() bool operator()(const CellSeed& seed) const + GPUhd() bool operator()(const TrackSeed& seed) const { return !(seed.getQ2Pt() > maxQ2Pt || seed.getChi2() > maxChi2); } @@ -280,7 +280,7 @@ struct compare_track_chi2 { template GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( - CellSeed* trackSeeds, + TrackSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, o2::its::TrackITSExt* tracks, @@ -384,9 +384,9 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( } } -template +template GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel( - CellSeed** cellSeedArray, + CellSeed** cellSeedArray, int* neighboursLUT, int* neighboursIndexTable, int** cellsLUTs, @@ -449,7 +449,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( int** trackletsLUT, const int nTrackletsCurrent, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTs, const float bz, const float maxChi2ClusterAttachment, @@ -522,7 +522,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( if constexpr (!initRun) { TimeEstBC ts = currentTracklet.getTimeStamp(); ts += nextTracklet.getTimeStamp(); - new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2, ts}; + new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2, ts}; } ++foundCells; } @@ -687,15 +687,15 @@ GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel( } } -template +template GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( const int layer, const int level, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CurrentSeed* currentCellSeeds, const int* currentCellIds, const unsigned int nCurrentCells, - CellSeed* updatedCellSeeds, + TrackSeed* updatedCellSeeds, int* updatedCellsIds, int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration const unsigned char** usedClusters, // Used clusters @@ -745,7 +745,7 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( if (usedClusters[layer - 1][neighbourCell.getFirstClusterIndex()]) { continue; } - auto seed{currentCell}; + TrackSeed seed{currentCell}; auto& trHit = foundTrackingFrameInfo[layer - 1][neighbourCell.getFirstClusterIndex()]; if (!seed.rotate(trHit.alphaTrackingFrame)) { @@ -922,7 +922,7 @@ void countCellsHandler( int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const float bz, @@ -932,7 +932,7 @@ void countCellsHandler( o2::its::ExternalAllocator* alloc, gpu::Streams& streams) { - gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( + gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -959,7 +959,7 @@ void computeCellsHandler( int** trackletsLUT, const int nTracklets, const int layer, - CellSeed* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const float bz, @@ -968,7 +968,7 @@ void computeCellsHandler( const float nSigmaCut, gpu::Streams& streams) { - gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( + gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -985,7 +985,7 @@ void computeCellsHandler( } template -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, +void countCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUT, int** cellsLUTs, gpuPair* cellNeighbours, @@ -1000,7 +1000,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, o2::its::ExternalAllocator* alloc, gpu::Stream& stream) { - gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( + gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1018,7 +1018,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, } template -void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, +void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUT, int** cellsLUTs, gpuPair* cellNeighbours, @@ -1032,7 +1032,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, const int maxCellNeighbours, gpu::Stream& stream) { - gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( + gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1065,14 +1065,14 @@ int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, template void processNeighboursHandler(const int startLayer, const int startLevel, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector>& seedsHost, + bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, @@ -1083,11 +1083,11 @@ void processNeighboursHandler(const int startLayer, constexpr uint64_t Tag = qStr2Tag("ITS_PNH1"); alloc->pushTagOnStack(Tag); auto allocInt = gpu::TypedAllocator(alloc); - auto allocCellSeed = gpu::TypedAllocator>(alloc); + auto allocTrackSeed = gpu::TypedAllocator>(alloc); thrust::device_vector> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(gpu::Stream::DefaultStream); - gpu::processNeighboursKernel<<<60, 256>>>( + gpu::processNeighboursKernel<<<60, 256>>>( startLayer, startLevel, allCellSeeds, @@ -1108,8 +1108,8 @@ void processNeighboursHandler(const int startLayer, thrust::exclusive_scan(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), foundSeedsTable.begin()); thrust::device_vector> updatedCellId(foundSeedsTable.back(), 0, allocInt); - thrust::device_vector, gpu::TypedAllocator>> updatedCellSeed(foundSeedsTable.back(), allocCellSeed); - gpu::processNeighboursKernel<<<60, 256>>>( + thrust::device_vector, gpu::TypedAllocator>> updatedCellSeed(foundSeedsTable.back(), allocTrackSeed); + gpu::processNeighboursKernel<<<60, 256>>>( startLayer, startLevel, allCellSeeds, @@ -1131,17 +1131,17 @@ void processNeighboursHandler(const int startLayer, int level = startLevel; thrust::device_vector> lastCellId(allocInt); - thrust::device_vector, gpu::TypedAllocator>> lastCellSeed(allocCellSeed); + thrust::device_vector, gpu::TypedAllocator>> lastCellSeed(allocTrackSeed); for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); - thrust::device_vector, gpu::TypedAllocator>>(allocCellSeed).swap(updatedCellSeed); + thrust::device_vector, gpu::TypedAllocator>>(allocTrackSeed).swap(updatedCellSeed); thrust::device_vector>(allocInt).swap(updatedCellId); auto lastCellSeedSize{lastCellSeed.size()}; foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), 0); - gpu::processNeighboursKernel<<<60, 256>>>( + gpu::processNeighboursKernel><<<60, 256>>>( iLayer, --level, allCellSeeds, @@ -1165,9 +1165,9 @@ void processNeighboursHandler(const int startLayer, updatedCellId.resize(foundSeeds); thrust::fill(nosync_policy, updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); - thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); + thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), TrackSeed()); - gpu::processNeighboursKernel<<<60, 256>>>( + gpu::processNeighboursKernel><<<60, 256>>>( iLayer, level, allCellSeeds, @@ -1187,7 +1187,7 @@ void processNeighboursHandler(const int startLayer, matCorrType); } GPUChkErrS(cudaStreamSynchronize(gpu::Stream::DefaultStream)); - thrust::device_vector, gpu::TypedAllocator>> outSeeds(updatedCellSeed.size(), allocCellSeed); + thrust::device_vector, gpu::TypedAllocator>> outSeeds(updatedCellSeed.size(), allocTrackSeed); auto end = thrust::copy_if(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; seedsHost.reserve(seedsHost.size() + s); @@ -1196,7 +1196,7 @@ void processNeighboursHandler(const int startLayer, } template -void countTrackSeedHandler(CellSeed* trackSeeds, +void countTrackSeedHandler(TrackSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, int* seedLUT, @@ -1242,7 +1242,7 @@ void countTrackSeedHandler(CellSeed* trackSeeds, } template -void computeTrackSeedHandler(CellSeed* trackSeeds, +void computeTrackSeedHandler(TrackSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, o2::its::TrackITSExt* tracks, @@ -1352,7 +1352,7 @@ template void countCellsHandler<7>(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed<7>* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const float bz, @@ -1369,7 +1369,7 @@ template void computeCellsHandler<7>(const Cluster** sortedClusters, int** trackletsLUT, const int nTracklets, const int layer, - CellSeed<7>* cells, + CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, const float bz, @@ -1378,7 +1378,7 @@ template void computeCellsHandler<7>(const Cluster** sortedClusters, const float nSigmaCut, gpu::Streams& streams); -template void countCellNeighboursHandler<7>(CellSeed<7>** cellsLayersDevice, +template void countCellNeighboursHandler<7>(CellSeed** cellsLayersDevice, int* neighboursLUT, int** cellsLUTs, gpuPair* cellNeighbours, @@ -1393,30 +1393,30 @@ template void countCellNeighboursHandler<7>(CellSeed<7>** cellsLayersDevice, o2::its::ExternalAllocator* alloc, gpu::Stream& stream); -template void computeCellNeighboursHandler(CellSeed<7>** cellsLayersDevice, - int* neighboursLUT, - int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, - const float maxChi2ClusterAttachment, - const float bz, - const int layerIndex, - const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - gpu::Stream& stream); +template void computeCellNeighboursHandler<7>(CellSeed** cellsLayersDevice, + int* neighboursLUT, + int** cellsLUTs, + gpuPair* cellNeighbours, + int* neighboursIndexTable, + const Tracklet** tracklets, + const float maxChi2ClusterAttachment, + const float bz, + const int layerIndex, + const unsigned int nCells, + const unsigned int nCellsNext, + const int maxCellNeighbours, + gpu::Stream& stream); template void processNeighboursHandler<7>(const int startLayer, const int startLevel, - CellSeed<7>** allCellSeeds, - CellSeed<7>* currentCellSeeds, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, std::array& nCells, const unsigned char** usedClusters, std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, - bounded_vector>& seedsHost, + bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, @@ -1424,7 +1424,7 @@ template void processNeighboursHandler<7>(const int startLayer, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); -template void countTrackSeedHandler(CellSeed<7>* trackSeeds, +template void countTrackSeedHandler(TrackSeed<7>* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, int* seedLUT, @@ -1442,7 +1442,7 @@ template void countTrackSeedHandler(CellSeed<7>* trackSeeds, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); -template void computeTrackSeedHandler(CellSeed<7>* trackSeeds, +template void computeTrackSeedHandler(TrackSeed<7>* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, const Cluster** unsortedClusters, o2::its::TrackITSExt* tracks, diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h index 01f5674fc611b..3a03e9d145907 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h @@ -25,7 +25,7 @@ #if !defined(__HIPCC__) && !defined(__CUDACC__) #include #include -#include "Framework/Logger.h" +#include "GPUCommonLogger.h" #endif #include "ITStracking/ExternalAllocator.h" #include "ITStracking/Constants.h" diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h index d223adcef6214..dce66bdf99415 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h @@ -24,30 +24,11 @@ namespace o2::its { -template -class CellSeed final : public o2::track::TrackParCovF +template +class SeedBase : public o2::track::TrackParCovF { public: - GPUhdDefault() CellSeed() = default; - GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, o2::track::TrackParCovF& tpc, float chi2, const TimeEstBC& time) : o2::track::TrackParCovF(tpc), mChi2(chi2), mLevel(1), mTime(time) - { - mClusters.fill(constants::UnusedIndex); - setUserField(innerL); - mClusters[innerL + 0] = cl0; - mClusters[innerL + 1] = cl1; - mClusters[innerL + 2] = cl2; - mTracklets[0] = trkl0; - mTracklets[1] = trkl1; - } - GPUhdDefault() CellSeed(const CellSeed&) = default; - GPUhdDefault() ~CellSeed() = default; - // GPUhdDefault() CellSeed(CellSeed&&) = default; TODO cannot use this yet since TrackPar only has device - GPUhdDefault() CellSeed& operator=(const CellSeed&) = default; - GPUhdDefault() CellSeed& operator=(CellSeed&&) = default; - - GPUhd() int getFirstClusterIndex() const { return mClusters[getUserField()]; }; - GPUhd() int getSecondClusterIndex() const { return mClusters[getUserField() + 1]; }; - GPUhd() int getThirdClusterIndex() const { return mClusters[getUserField() + 2]; }; + GPUhd() int getInnerLayer() const { return getUserField(); } GPUhd() int getFirstTrackletIndex() const { return mTracklets[0]; }; GPUhd() void setFirstTrackletIndex(int trkl) { mTracklets[0] = trkl; }; GPUhd() int getSecondTrackletIndex() const { return mTracklets[1]; }; @@ -57,31 +38,108 @@ class CellSeed final : public o2::track::TrackParCovF GPUhd() int getLevel() const { return mLevel; }; GPUhd() void setLevel(int level) { mLevel = level; }; GPUhd() int* getLevelPtr() { return &mLevel; } - GPUhd() auto& getClusters() { return mClusters; } - GPUhd() int getCluster(int i) const { return mClusters[i]; } - GPUhd() void printCell() const - { - printf("cell: %d, %d\t lvl: %d\t chi2: %f\tcls: [", mTracklets[0], mTracklets[1], mLevel, mChi2); - for (int i = 0; i < NLayers; ++i) { - printf("%d", mClusters[i]); - if (i < NLayers - 1) { - printf(" | "); - } - } - printf("]"); - printf(" ts: %u +/- %u\n", mTime.getTimeStamp(), mTime.getTimeStampError()); - } GPUhd() auto& getTimeStamp() noexcept { return mTime; } GPUhd() const auto& getTimeStamp() const noexcept { return mTime; } + protected: + GPUhdDefault() SeedBase() = default; + GPUhdDefault() SeedBase(const SeedBase&) = default; + GPUhdDefault() ~SeedBase() = default; + GPUhdDefault() SeedBase(SeedBase&&) = default; + GPUhdDefault() SeedBase& operator=(const SeedBase&) = default; + GPUhdDefault() SeedBase& operator=(SeedBase&&) = default; + GPUhd() SeedBase(const o2::track::TrackParCovF& tpc, float chi2, int level, const TimeEstBC& time) + : o2::track::TrackParCovF(tpc), mChi2(chi2), mLevel(level), mTime(time) + { + } + GPUhd() auto& clustersRaw() { return mClusters; } + GPUhd() const auto& clustersRaw() const { return mClusters; } + private: float mChi2 = -999.f; int mLevel = constants::UnusedIndex; std::array mTracklets = constants::helpers::initArray(); - std::array mClusters = constants::helpers::initArray(); + std::array mClusters = constants::helpers::initArray(); TimeEstBC mTime; }; +/// CellSeed: connections of three clusters +class CellSeed final : public SeedBase<3> +{ + static constexpr int NStoredClusters = 3; + using Base = SeedBase; + + public: + GPUhdDefault() CellSeed() = default; + GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, const o2::track::TrackParCovF& tpc, float chi2, const TimeEstBC& time) + : Base(tpc, chi2, 1, time) + { + setUserField(innerL); + auto& clusters = this->clustersRaw(); + clusters[0] = cl0; + clusters[1] = cl1; + clusters[2] = cl2; + setFirstTrackletIndex(trkl0); + setSecondTrackletIndex(trkl1); + } + GPUhdDefault() CellSeed(const CellSeed&) = default; + GPUhdDefault() ~CellSeed() = default; + GPUhdDefault() CellSeed(CellSeed&&) = default; + GPUhdDefault() CellSeed& operator=(const CellSeed&) = default; + GPUhdDefault() CellSeed& operator=(CellSeed&&) = default; + + GPUhd() int getFirstClusterIndex() const { return this->clustersRaw()[0]; }; + GPUhd() int getSecondClusterIndex() const { return this->clustersRaw()[1]; }; + GPUhd() int getThirdClusterIndex() const { return this->clustersRaw()[2]; }; + GPUhd() auto& getClusters() { return this->clustersRaw(); } + GPUhd() const auto& getClusters() const { return this->clustersRaw(); } + /// getCluster takes an ABSOLUTE layer index and returns UnusedIndex if the + /// layer is outside the 3 stored slots (innerL, innerL+1, innerL+2). + GPUhd() int getCluster(int layer) const + { + const int rel = layer - getInnerLayer(); + return (rel >= 0 && rel < NStoredClusters) ? this->clustersRaw()[rel] : constants::UnusedIndex; + } +}; + +/// TrackSeed: full-width working representation used during road finding. +/// processNeighbours extends the cluster list inward, so we need NLayers +/// absolute-indexed slots here. +template +class TrackSeed final : public SeedBase +{ + using Base = SeedBase; + + public: + GPUhdDefault() TrackSeed() = default; + GPUhd() TrackSeed(const CellSeed& cs) + : Base(static_cast(cs), cs.getChi2(), cs.getLevel(), cs.getTimeStamp()) + { + this->setUserField(cs.getInnerLayer()); + this->setFirstTrackletIndex(cs.getFirstTrackletIndex()); + this->setSecondTrackletIndex(cs.getSecondTrackletIndex()); + const int innerL = cs.getInnerLayer(); + auto& clusters = this->clustersRaw(); + clusters[innerL + 0] = cs.getFirstClusterIndex(); + clusters[innerL + 1] = cs.getSecondClusterIndex(); + clusters[innerL + 2] = cs.getThirdClusterIndex(); + } + GPUhdDefault() TrackSeed(const TrackSeed&) = default; + GPUhdDefault() ~TrackSeed() = default; + GPUhdDefault() TrackSeed(TrackSeed&&) = default; + GPUhdDefault() TrackSeed& operator=(const TrackSeed&) = default; + GPUhdDefault() TrackSeed& operator=(TrackSeed&&) = default; + + /// Three-cluster view of the original cell — note: innerL (UserField) is not + /// updated when processNeighbours extends the cluster list leftward. + GPUhd() int getFirstClusterIndex() const { return this->clustersRaw()[this->getUserField()]; } + GPUhd() int getSecondClusterIndex() const { return this->clustersRaw()[this->getUserField() + 1]; } + GPUhd() int getThirdClusterIndex() const { return this->clustersRaw()[this->getUserField() + 2]; } + GPUhd() auto& getClusters() { return this->clustersRaw(); } + GPUhd() const auto& getClusters() const { return this->clustersRaw(); } + GPUhd() int getCluster(int layer) const { return this->clustersRaw()[layer]; } +}; + } // namespace o2::its #endif /* TRACKINGITSU_INCLUDE_CACELL_H_ */ diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index f8009e3ce8008..d48e8fb7c5856 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -30,14 +30,12 @@ constexpr float MB = KB * KB; constexpr float GB = MB * KB; constexpr bool DoTimeBenchmarks = true; constexpr bool SaveTimeBenchmarks = false; - -GPUconstexpr() float Tolerance = 1e-12; // numerical tolerance -GPUconstexpr() int ClustersPerCell = 3; -GPUconstexpr() int UnusedIndex = -1; -GPUconstexpr() float Resolution = 0.0005f; -GPUconstexpr() float Radl = 9.36f; // Radiation length of Si [cm] -GPUconstexpr() float Rho = 2.33f; // Density of Si [g/cm^3] -GPUconstexpr() int MaxIter = 4; // Max. supported iterations +constexpr float Tolerance = 1e-12; // numerical tolerance +constexpr int ClustersPerCell = 3; +constexpr int UnusedIndex = -1; +constexpr float Radl = 9.36f; // Radiation length of Si [cm] +constexpr float Rho = 2.33f; // Density of Si [g/cm^3] +constexpr int MaxIter = 4; // Max. supported iterations namespace helpers { diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index 3dd1b05cf8969..f2506694755c5 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -68,7 +68,7 @@ struct TimeFrame { using ROFOverlapTableN = ROFOverlapTable; using ROFVertexLookupTableN = ROFVertexLookupTable; using ROFMaskTableN = ROFMaskTable; - using CellSeedN = CellSeed; + using TrackSeedN = TrackSeed; friend class gpu::TimeFrameGPU; TimeFrame() = default; @@ -271,7 +271,7 @@ struct TimeFrame { std::array, NLayers> mUnsortedClusters; std::vector> mTracklets; - std::vector> mCells; + std::vector> mCells; bounded_vector mTracks; bounded_vector mTracksLabel; std::vector> mCellsNeighbours; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index fd3251a59d835..92c652ce9bcb1 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -43,7 +43,7 @@ class TrackerTraits { public: using IndexTableUtilsN = IndexTableUtils; - using CellSeedN = CellSeed; + using TrackSeedN = TrackSeed; virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf) { mTimeFrame = tf; } @@ -53,7 +53,9 @@ class TrackerTraits virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); - virtual void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); + + template + void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); void updateTrackingParameters(const std::vector& trkPars) { mTrkParams = trkPars; } TimeFrame* getTimeFrame() { return mTimeFrame; } @@ -82,7 +84,7 @@ class TrackerTraits private: track::TrackParCov buildTrackSeed(const Cluster& cluster1, const Cluster& cluster2, const TrackingFrameInfo& tf3, bool reverse = false); - TrackITSExt seedTrackForRefit(const CellSeedN& seed); + TrackITSExt seedTrackForRefit(const TrackSeedN& seed); bool fitTrack(TrackITSExt& track, int start, int end, int step, float chi2clcut = o2::constants::math::VeryBig, float chi2ndfcut = o2::constants::math::VeryBig, float maxQoverPt = o2::constants::math::VeryBig, int nCl = 0, o2::track::TrackPar* refLin = nullptr); std::shared_ptr mMemoryPool; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index 5b412ea4eea69..cafddfcc41a76 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -351,7 +351,7 @@ unsigned long TimeFrame::getArtefactsMemory() const size += sizeof(Tracklet) * trkl.size(); } for (const auto& cells : mCells) { - size += sizeof(CellSeedN) * cells.size(); + size += sizeof(CellSeed) * cells.size(); } for (const auto& cellsN : mCellsNeighbours) { size += sizeof(int) * cellsN.size(); diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index cf4b169d255c2..68e67ab3c6949 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -268,7 +268,7 @@ void TrackerTraits::computeLayerCells(const int iteration) } mTaskArena->execute([&] { - auto forTrackletCells = [&](auto Tag, int iLayer, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { + auto forTrackletCells = [&](auto Tag, int iLayer, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { const Tracklet& currentTracklet{mTimeFrame->getTracklets()[iLayer][iTracklet]}; const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; const int nextLayerFirstTrackletIndex{mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex]}; @@ -329,13 +329,12 @@ void TrackerTraits::computeLayerCells(const int iteration) TimeEstBC ts = currentTracklet.getTimeStamp(); ts += nextTracklet.getTimeStamp(); if constexpr (decltype(Tag)::value == PassMode::OnePass::value) { - // layerCells.emplace_back(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); ++foundCells; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { ++foundCells; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { - layerCells[offset++] = CellSeedN(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); + layerCells[offset++] = CellSeed(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); } else { static_assert(false, "Unknown mode!"); } @@ -535,7 +534,8 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } template -void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) +template +void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) { auto propagator = o2::base::Propagator::Instance(); @@ -575,7 +575,7 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bou } /// Let's start the fitting procedure - CellSeedN seed{currentCell}; + TrackSeedN seed{currentCell}; // CellSeed → TrackSeed explicit ctor expands clusters; TrackSeed → TrackSeed is copy seed.getTimeStamp() = currentCell.getTimeStamp(); seed.getTimeStamp() += neighbourCell.getTimeStamp(); const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()]; @@ -668,14 +668,14 @@ void TrackerTraits::findRoads(const int iteration) return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5); }; - bounded_vector trackSeeds(mMemoryPool.get()); + bounded_vector trackSeeds(mMemoryPool.get()); for (int startLayer{mTrkParams[iteration].NeighboursPerRoad()}; startLayer >= startLevel - 1; --startLayer) { if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } bounded_vector lastCellId(mMemoryPool.get()), updatedCellId(mMemoryPool.get()); - bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); + bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); processNeighbours(startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); @@ -912,7 +912,7 @@ bool TrackerTraits::fitTrack(TrackITSExt& track, int start, int end, in // create a new seed either from the existing track inner param or reseed from the edgepointd and cluster in the middle template -TrackITSExt TrackerTraits::seedTrackForRefit(const CellSeedN& seed) +TrackITSExt TrackerTraits::seedTrackForRefit(const TrackSeedN& seed) { TrackITSExt temporaryTrack(seed); int lrMin = NLayers, lrMax = 0, lrMid = 0; @@ -1013,9 +1013,13 @@ void TrackerTraits::setNThreads(int n, std::shared_ptr } template class TrackerTraits<7>; +template void TrackerTraits<7>::processNeighbours(int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<7>::processNeighbours>(int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); // ALICE3 upgrade #ifdef ENABLE_UPGRADES template class TrackerTraits<11>; +template void TrackerTraits<11>::processNeighbours(int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<11>::processNeighbours>(int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); #endif } // namespace o2::its From 5cd39b1afb5f991b3c8142b85195c0730a843e8c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sun, 19 Apr 2026 08:46:50 +0200 Subject: [PATCH 4/4] ITS: factor common functions out Signed-off-by: Felix Schlepper --- .../GPU/ITStrackingGPU/TrackingKernels.h | 5 + .../ITS/tracking/GPU/ITStrackingGPU/Utils.h | 21 -- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 84 ++--- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 333 +++++------------ .../include/ITStracking/IndexTableUtils.h | 28 +- .../include/ITStracking/TrackHelpers.h | 209 +++++++++++ .../include/ITStracking/TrackerTraits.h | 42 +-- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 353 ++++++++---------- .../ITS/tracking/src/TrackingInterface.cxx | 1 - 9 files changed, 520 insertions(+), 556 deletions(-) create mode 100644 Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 2ee70f447a086..bf004426f9134 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -109,6 +109,7 @@ void countCellsHandler(const Cluster** sortedClusters, const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); @@ -127,6 +128,7 @@ void computeCellsHandler(const Cluster** sortedClusters, const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, gpu::Streams& streams); template @@ -180,6 +182,7 @@ void processNeighboursHandler(const int startLayer, const float bz, const float MaxChi2ClusterAttachment, const float maxChi2NDF, + const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); @@ -191,6 +194,7 @@ void countTrackSeedHandler(TrackSeed* trackSeeds, int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const float Bz, const int startLevel, @@ -211,6 +215,7 @@ void computeTrackSeedHandler(TrackSeed* trackSeeds, const int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const unsigned int nTracks, const float Bz, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h index 44cd8d7e7492b..bcc20ace7bbc2 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h @@ -343,27 +343,6 @@ struct TypedAllocator { ExternalAllocator* mInternalAllocator; }; -template -GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, - const o2::its::IndexTableUtils* utils, - const float z1, const float z2, float maxdeltaz, float maxdeltaphi) -{ - const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz; - const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi; - const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; - const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi; - - if (zRangeMax < -utils->getLayerZ(layerIndex) || - zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) { - return {}; - } - - return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)), - utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)), - utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; -} - GPUdii() gpuSpan getPrimaryVertices(const int rof, const int* roframesPV, const int nROF, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 4a858dd7222e3..0359f2cfb0d03 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -16,6 +16,7 @@ #include "DataFormatsITS/TrackITS.h" +#include "ITStracking/TrackHelpers.h" #include "ITStrackingGPU/TrackerTraitsGPU.h" #include "ITStrackingGPU/TrackingKernels.h" #include "ITStracking/Constants.h" @@ -184,6 +185,7 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, this->mTrkParams[iteration].NSigmaCut, + this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getFrameworkAllocator(), mTimeFrameGPU->getStreams()); mTimeFrameGPU->createCellsBuffers(iLayer); @@ -204,6 +206,7 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, this->mTrkParams[iteration].NSigmaCut, + this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getStreams()); } } @@ -263,6 +266,10 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) template void TrackerTraitsGPU::findRoads(const int iteration) { + bounded_vector> firstClusters(this->mTrkParams[iteration].NLayers, bounded_vector(this->getMemoryPool().get()), this->getMemoryPool().get()); + bounded_vector> sharedFirstClusters(this->mTrkParams[iteration].NLayers, bounded_vector(this->getMemoryPool().get()), this->getMemoryPool().get()); + firstClusters.resize(this->mTrkParams[iteration].NLayers); + sharedFirstClusters.resize(this->mTrkParams[iteration].NLayers); for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) { const int minimumLayer{startLevel - 1}; bounded_vector> trackSeeds(this->getMemoryPool().get()); @@ -281,10 +288,11 @@ void TrackerTraitsGPU::findRoads(const int iteration) mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), trackSeeds, this->mBz, - this->mTrkParams[0].MaxChi2ClusterAttachment, - this->mTrkParams[0].MaxChi2NDF, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mTrkParams[iteration].MaxChi2NDF, + this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getDevicePropagator(), - this->mTrkParams[0].CorrType, + this->mTrkParams[iteration].CorrType, mTimeFrameGPU->getFrameworkAllocator()); } // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory. @@ -302,16 +310,17 @@ void TrackerTraitsGPU::findRoads(const int iteration) mTimeFrameGPU->getDeviceTrackSeedsLUT(), this->mTrkParams[iteration].LayerRadii, this->mTrkParams[iteration].MinPt, + this->mTrkParams[iteration].LayerxX0, trackSeeds.size(), this->mBz, startLevel, - this->mTrkParams[0].MaxChi2ClusterAttachment, - this->mTrkParams[0].MaxChi2NDF, - this->mTrkParams[0].ReseedIfShorter, - this->mTrkParams[0].RepeatRefitOut, - this->mTrkParams[0].ShiftRefToCluster, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mTrkParams[iteration].MaxChi2NDF, + this->mTrkParams[iteration].ReseedIfShorter, + this->mTrkParams[iteration].RepeatRefitOut, + this->mTrkParams[iteration].ShiftRefToCluster, mTimeFrameGPU->getDevicePropagator(), - this->mTrkParams[0].CorrType, + this->mTrkParams[iteration].CorrType, mTimeFrameGPU->getFrameworkAllocator()); mTimeFrameGPU->createTrackITSExtDevice(trackSeeds.size()); computeTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(), @@ -321,65 +330,26 @@ void TrackerTraitsGPU::findRoads(const int iteration) mTimeFrameGPU->getDeviceTrackSeedsLUT(), this->mTrkParams[iteration].LayerRadii, this->mTrkParams[iteration].MinPt, + this->mTrkParams[iteration].LayerxX0, trackSeeds.size(), mTimeFrameGPU->getNTrackSeeds(), this->mBz, startLevel, - this->mTrkParams[0].MaxChi2ClusterAttachment, - this->mTrkParams[0].MaxChi2NDF, - this->mTrkParams[0].ReseedIfShorter, - this->mTrkParams[0].RepeatRefitOut, - this->mTrkParams[0].ShiftRefToCluster, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mTrkParams[iteration].MaxChi2NDF, + this->mTrkParams[iteration].ReseedIfShorter, + this->mTrkParams[iteration].RepeatRefitOut, + this->mTrkParams[iteration].ShiftRefToCluster, mTimeFrameGPU->getDevicePropagator(), - this->mTrkParams[0].CorrType, + this->mTrkParams[iteration].CorrType, mTimeFrameGPU->getFrameworkAllocator()); mTimeFrameGPU->downloadTrackITSExtDevice(); auto& tracks = mTimeFrameGPU->getTrackITSExt(); - - for (auto& track : tracks) { - if (!track.getChi2()) { - continue; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised. - } - int nShared = 0; - bool isFirstShared{false}; - for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer))); - isFirstShared |= !iLayer && mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer)); - } - - if (nShared > this->mTrkParams[0].ClusterSharing) { - continue; - } - - bool firstCls{true}; - TimeEstBC ts; - for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer)); - int currentROF = mTimeFrameGPU->getClusterROF(iLayer, track.getClusterIndex(iLayer)); - auto rofTS = mTimeFrameGPU->getROFOverlapTableView().getLayer(iLayer).getROFTimeBounds(currentROF, true); - if (firstCls) { - ts = rofTS; - } else { - if (!ts.isCompatible(rofTS)) { - LOGP(fatal, "TS {}+/-{} are incompatible with {}+/-{}, this should not happen!", rofTS.getTimeStamp(), rofTS.getTimeStampError(), ts.getTimeStamp(), ts.getTimeStampError()); - } - ts += rofTS; - } - } - track.getTimeStamp() = ts.makeSymmetrical(); - track.setUserField(0); - track.getParamOut().setUserField(0); - mTimeFrameGPU->getTracks().emplace_back(track); - } + this->acceptTracks(iteration, tracks, firstClusters, sharedFirstClusters); mTimeFrameGPU->loadUsedClustersDevice(); } + this->markTracks(iteration, sharedFirstClusters); // wipe the artefact memory mTimeFrameGPU->popMemoryStack(iteration); }; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index adeba0b684e21..bc65ab71e000c 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -31,6 +31,7 @@ #include "ITStracking/Tracklet.h" #include "ITStracking/Cluster.h" #include "ITStracking/Cell.h" +#include "ITStracking/TrackHelpers.h" #include "DataFormatsITS/TrackITS.h" #include "ITStrackingGPU/TrackingKernels.h" #include "ITStrackingGPU/Utils.h" @@ -46,168 +47,6 @@ namespace o2::its namespace gpu { -GPUdii() bool fitTrack(TrackITSExt& track, - int start, - int end, - int step, - float chi2clcut, - float chi2ndfcut, - float maxQoverPt, - int nCl, - float bz, - const TrackingFrameInfo** tfInfos, - const o2::base::Propagator* prop, - o2::base::PropagatorF::MatCorrType matCorrType, - o2::track::TrackPar* linRef, - const bool shiftRefToCluster) -{ - for (int iLayer{start}; iLayer != end; iLayer += step) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - const TrackingFrameInfo& trackingHit = tfInfos[iLayer][track.getClusterIndex(iLayer)]; - if (linRef) { - if (!track.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame, *linRef, bz)) { - return false; - } - if (!prop->propagateToX(track, - *linRef, - trackingHit.xTrackingFrame, - bz, - o2::base::PropagatorImpl::MAX_SIN_PHI, - o2::base::PropagatorImpl::MAX_STEP, - matCorrType)) { - - return false; - } - if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - const float xx0 = (iLayer > 2) ? 1.e-2f : 5.e-3f; // Rough layer thickness - if (!track.correctForMaterial(*linRef, xx0, xx0 * constants::Radl * constants::Rho, true)) { - return false; - } - } - } else { - if (!track.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame)) { - return false; - } - if (!prop->propagateToX(track, - trackingHit.xTrackingFrame, - bz, - o2::base::PropagatorImpl::MAX_SIN_PHI, - o2::base::PropagatorImpl::MAX_STEP, - matCorrType)) { - return false; - } - if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - const float xx0 = (iLayer > 2) ? 1.e-2f : 5.e-3f; // Rough layer thickness - if (!track.correctForMaterial(xx0, xx0 * constants::Radl * constants::Rho, true)) { - return false; - } - } - } - - auto predChi2{track.getPredictedChi2(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; - if ((nCl >= 3 && predChi2 > chi2clcut) || predChi2 < 0.f) { - return false; - } - track.setChi2(track.getChi2() + predChi2); - if (!track.o2::track::TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { - return false; - } - if (linRef && shiftRefToCluster) { // displace the reference to the last updated cluster - linRef->setY(trackingHit.positionTrackingFrame[0]); - linRef->setZ(trackingHit.positionTrackingFrame[1]); - } - nCl++; - } - return o2::gpu::CAMath::Abs(track.getQ2Pt()) < maxQoverPt && track.getChi2() < chi2ndfcut * (nCl * 2 - 5); -} - -GPUdii() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1, - const Cluster& cluster2, - const TrackingFrameInfo& tf3, - const float bz, - const bool reverse = false) -{ - const float sign = reverse ? -1.f : 1.f; - - float ca, sa; - o2::gpu::CAMath::SinCos(tf3.alphaTrackingFrame, sa, ca); - - const float x1 = cluster1.xCoordinate * ca + cluster1.yCoordinate * sa; - const float y1 = -cluster1.xCoordinate * sa + cluster1.yCoordinate * ca; - const float x2 = cluster2.xCoordinate * ca + cluster2.yCoordinate * sa; - const float y2 = -cluster2.xCoordinate * sa + cluster2.yCoordinate * ca; - const float x3 = tf3.xTrackingFrame; - const float y3 = tf3.positionTrackingFrame[0]; - - float snp, q2pt, q2pt2; - if (o2::gpu::CAMath::Abs(bz) < 0.01f) { - const float dx = x3 - x1; - const float dy = y3 - y1; - snp = sign * dy / o2::gpu::CAMath::Hypot(dx, dy); - q2pt = 1.f / track::kMostProbablePt; - q2pt2 = 1.f; - } else { - const float crv = math_utils::computeCurvature(x3, y3, x2, y2, x1, y1); - snp = sign * crv * (x3 - math_utils::computeCurvatureCentreX(x3, y3, x2, y2, x1, y1)); - q2pt = sign * crv / (bz * o2::constants::math::B2C); - q2pt2 = crv * crv; - } - - const float tgl = 0.5f * (math_utils::computeTanDipAngle(x1, y1, x2, y2, cluster1.zCoordinate, cluster2.zCoordinate) + - math_utils::computeTanDipAngle(x2, y2, x3, y3, cluster2.zCoordinate, tf3.positionTrackingFrame[1])); - const float sg2q2pt = track::kC1Pt2max * (q2pt2 > 0.0005f ? (q2pt2 < 1.f ? q2pt2 : 1.f) : 0.0005f); - - return {x3, tf3.alphaTrackingFrame, {y3, tf3.positionTrackingFrame[1], snp, tgl, q2pt}, {tf3.covarianceTrackingFrame[0], tf3.covarianceTrackingFrame[1], tf3.covarianceTrackingFrame[2], 0.f, 0.f, track::kCSnp2max, 0.f, 0.f, 0.f, track::kCTgl2max, 0.f, 0.f, 0.f, 0.f, sg2q2pt}}; -} - -template -GPUdii() TrackITSExt seedTrackForRefit(const TrackSeed& seed, - const TrackingFrameInfo** foundTrackingFrameInfo, - const Cluster** unsortedClusters, - const float* layerRadii, - const float bz, - const int reseedIfShorter) -{ - TrackITSExt temporaryTrack(seed); - int lrMin = NLayers, lrMax = 0, lrMid = 0; - for (int iL{0}; iL < NLayers; ++iL) { - const int idx = seed.getCluster(iL); - temporaryTrack.setExternalClusterIndex(iL, idx, idx != constants::UnusedIndex); - if (idx != constants::UnusedIndex) { - // TODO only works if does not have holes - lrMin = o2::gpu::CAMath::Min(lrMin, iL); - lrMax = o2::gpu::CAMath::Max(lrMax, iL); - } - } - const int ncl = temporaryTrack.getNClusters(); - if (ncl < reseedIfShorter && ncl > 0) { // need to check if there are any clusters since we keep invalidate seeeds around - if (ncl == NLayers) { - lrMin = 0; - lrMax = NLayers - 1; - lrMid = (lrMin + lrMax) / 2; - } else { - lrMid = lrMin + 1; - float midR = 0.5f * (layerRadii[lrMax] + layerRadii[lrMin]), dstMidR = o2::gpu::CAMath::Abs(midR - layerRadii[lrMid]); - for (int iL = lrMid + 1; iL < lrMax; ++iL) { // find the midpoint as closest to the midR - auto dst = o2::gpu::GPUCommonMath::Abs(midR - layerRadii[iL]); - if (dst < dstMidR) { - lrMid = iL; - dstMidR = dst; - } - } - } - const auto& cluster0_tf = foundTrackingFrameInfo[lrMin][seed.getCluster(lrMin)]; - const auto& cluster1_gl = unsortedClusters[lrMid][seed.getCluster(lrMid)]; - const auto& cluster2_gl = unsortedClusters[lrMax][seed.getCluster(lrMax)]; - temporaryTrack.getParamIn() = buildTrackSeed(cluster2_gl, cluster1_gl, cluster0_tf, bz, true); - } - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); - return temporaryTrack; -} - struct sort_tracklets { GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { @@ -287,6 +126,7 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( maybe_const* seedLUT, const float* layerRadii, const float* minPts, + const float* layerxX0, const unsigned int nSeeds, const float bz, const int startLevel, @@ -306,44 +146,50 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( } } - TrackITSExt temporaryTrack = seedTrackForRefit(trackSeeds[iCurrentTrackSeedIndex], foundTrackingFrameInfo, unsortedClusters, layerRadii, bz, reseedIfShorter); + TrackITSExt temporaryTrack = o2::its::track::seedTrackForRefit(trackSeeds[iCurrentTrackSeedIndex], + foundTrackingFrameInfo, + unsortedClusters, + layerRadii, + bz, + reseedIfShorter); o2::track::TrackPar linRef{temporaryTrack}; - bool fitSuccess = fitTrack(temporaryTrack, // TrackITSExt& track, - 0, // int lastLayer, - NLayers, // int firstLayer, - 1, // int firstCluster, - maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, - maxChi2NDF, // float maxChi2NDF, - o2::constants::math::VeryBig, // float maxQoverPt, - 0, // nCl, - bz, // float bz, - foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, - propagator, // const o2::base::Propagator* propagator, - matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType - &linRef, - shifRefToCluster); + bool fitSuccess = o2::its::track::fitTrack(temporaryTrack, // TrackITSExt& track, + 0, // int lastLayer, + NLayers, // int firstLayer, + 1, // int firstCluster, + maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, + maxChi2NDF, // float maxChi2NDF, + o2::constants::math::VeryBig, // float maxQoverPt, + 0, // nCl, + bz, // float bz, + foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, + layerxX0, // const float* layerxX0, + propagator, // const o2::base::Propagator* propagator, + matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType + &linRef, + shifRefToCluster); if (!fitSuccess) { continue; } temporaryTrack.getParamOut() = temporaryTrack.getParamIn(); linRef = temporaryTrack.getParamOut(); // use refitted track as lin.reference - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); + track::resetTrackCovariance(temporaryTrack); temporaryTrack.setChi2(0); - fitSuccess = fitTrack(temporaryTrack, // TrackITSExt& track, - NLayers - 1, // int lastLayer, - -1, // int firstLayer, - -1, // int firstCluster, - maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, - maxChi2NDF, // float maxChi2NDF, - 50.f, // float maxQoverPt, - 0, // nCl, - bz, // float bz, - foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, - propagator, // const o2::base::Propagator* propagator, - matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType - &linRef, - shifRefToCluster); + fitSuccess = o2::its::track::fitTrack(temporaryTrack, // TrackITSExt& track, + NLayers - 1, // int lastLayer, + -1, // int firstLayer, + -1, // int firstCluster, + maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, + maxChi2NDF, // float maxChi2NDF, + 50.f, // float maxQoverPt, + 0, // nCl, + bz, // float bz, + foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, + layerxX0, // const float* layerxX0, + propagator, // const o2::base::Propagator* propagator, + matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType + &linRef, + shifRefToCluster); if (!fitSuccess || temporaryTrack.getPt() < minPts[NLayers - temporaryTrack.getNClusters()]) { continue; } @@ -351,23 +197,23 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( o2::track::TrackParCov saveInw{temporaryTrack}; linRef = saveInw; // use refitted track as lin.reference float saveChi2 = temporaryTrack.getChi2(); - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); + track::resetTrackCovariance(temporaryTrack); temporaryTrack.setChi2(0); - fitSuccess = fitTrack(temporaryTrack, // TrackITSExt& track, - 0, // int lastLayer, - NLayers, // int firstLayer, - 1, // int firstCluster, - maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, - maxChi2NDF, // float maxChi2NDF, - o2::constants::math::VeryBig, // float maxQoverPt, - 0, // nCl, - bz, // float bz, - foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, - propagator, // const o2::base::Propagator* propagator, - matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType - &linRef, - shifRefToCluster); + fitSuccess = o2::its::track::fitTrack(temporaryTrack, // TrackITSExt& track, + 0, // int lastLayer, + NLayers, // int firstLayer, + 1, // int firstCluster, + maxChi2ClusterAttachment, // float maxChi2ClusterAttachment, + maxChi2NDF, // float maxChi2NDF, + o2::constants::math::VeryBig, // float maxQoverPt, + 0, // nCl, + bz, // float bz, + foundTrackingFrameInfo, // TrackingFrameInfo** trackingFrameInfo, + layerxX0, // const float* layerxX0, + propagator, // const o2::base::Propagator* propagator, + matCorrType, // o2::base::PropagatorF::MatCorrType matCorrType + &linRef, + shifRefToCluster); if (!fitSuccess) { continue; } @@ -451,12 +297,12 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( const int layer, CellSeed* cells, int** cellsLUTs, + const float* layerxX0, const float bz, const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut) { - constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // FIXME: Hardcoded here for the moment. for (int iCurrentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentTrackletIndex < nTrackletsCurrent; iCurrentTrackletIndex += blockDim.x * gridDim.x) { if constexpr (!initRun) { if (cellsLUTs[layer][iCurrentTrackletIndex] == cellsLUTs[layer][iCurrentTrackletIndex + 1]) { @@ -490,7 +336,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( const auto& cluster1_glo = unsortedClusters[layer][clusId[0]]; const auto& cluster2_glo = unsortedClusters[layer + 1][clusId[1]]; const auto& cluster3_tf = tfInfo[layer + 2][clusId[2]]; - auto track{buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf, bz)}; + auto track{o2::its::track::buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf, bz)}; float chi2{0.f}; bool good{false}; for (int iC{2}; iC--;) { @@ -620,7 +466,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + constants::Tolerance)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInverseDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; + const int4 selectedBinsRect{o2::its::getBinsRect(currentCluster, layerIndex + 1, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut, *utils)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { continue; } @@ -631,7 +477,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( } for (short targetROF = rofOverlap.getFirstEntry(); targetROF < rofOverlap.getEntriesBound(); ++targetROF) { - if (!rofMask.isROFEnabled(layerIndex + 1, pivotROF)) { + if (!rofMask.isROFEnabled(layerIndex + 1, targetROF)) { continue; } auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs1, layerIndex + 1, ROFClusters, clusters); @@ -702,12 +548,12 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( int* neighbours, int* neighboursLUT, const TrackingFrameInfo** foundTrackingFrameInfo, + const float* layerxX0, const float bz, const float maxChi2ClusterAttachment, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType) { - constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // FIXME: Hardcoded here for the moment. for (unsigned int iCurrentCell = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCell < nCurrentCells; iCurrentCell += blockDim.x * gridDim.x) { if constexpr (!dryRun) { if (foundSeedsTable[iCurrentCell] == foundSeedsTable[iCurrentCell + 1]) { @@ -929,19 +775,22 @@ void countCellsHandler( const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, o2::its::ExternalAllocator* alloc, gpu::Streams& streams) { + thrust::device_vector layerxX0(layerxX0Host); gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( - sortedClusters, // const Cluster** - unsortedClusters, // const Cluster** - tfInfo, // const TrackingFrameInfo** - tracklets, // const Tracklets** - trackletsLUT, // const int** - nTracklets, // const int - layer, // const int - cells, // CellSeed* - cellsLUTsArrayDevice, // int** + sortedClusters, // const Cluster** + unsortedClusters, // const Cluster** + tfInfo, // const TrackingFrameInfo** + tracklets, // const Tracklets** + trackletsLUT, // const int** + nTracklets, // const int + layer, // const int + cells, // CellSeed* + cellsLUTsArrayDevice, // int** + thrust::raw_pointer_cast(&layerxX0[0]), bz, // const float maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float @@ -966,18 +815,21 @@ void computeCellsHandler( const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, gpu::Streams& streams) { + thrust::device_vector layerxX0(layerxX0Host); gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( - sortedClusters, // const Cluster** - unsortedClusters, // const Cluster** - tfInfo, // const TrackingFrameInfo** - tracklets, // const Tracklets** - trackletsLUT, // const int** - nTracklets, // const int - layer, // const int - cells, // CellSeed* - cellsLUTsArrayDevice, // int** + sortedClusters, // const Cluster** + unsortedClusters, // const Cluster** + tfInfo, // const TrackingFrameInfo** + tracklets, // const Tracklets** + trackletsLUT, // const int** + nTracklets, // const int + layer, // const int + cells, // CellSeed* + cellsLUTsArrayDevice, // int** + thrust::raw_pointer_cast(&layerxX0[0]), bz, // const float maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float @@ -1076,6 +928,7 @@ void processNeighboursHandler(const int startLayer, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, + const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc) @@ -1084,6 +937,7 @@ void processNeighboursHandler(const int startLayer, alloc->pushTagOnStack(Tag); auto allocInt = gpu::TypedAllocator(alloc); auto allocTrackSeed = gpu::TypedAllocator>(alloc); + thrust::device_vector layerxX0(layerxX0Host); thrust::device_vector> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(gpu::Stream::DefaultStream); @@ -1101,6 +955,7 @@ void processNeighboursHandler(const int startLayer, neighbours[startLayer - 1], neighboursDeviceLUTs[startLayer - 1], foundTrackingFrameInfo, + thrust::raw_pointer_cast(&layerxX0[0]), bz, maxChi2ClusterAttachment, propagator, @@ -1123,6 +978,7 @@ void processNeighboursHandler(const int startLayer, neighbours[startLayer - 1], neighboursDeviceLUTs[startLayer - 1], foundTrackingFrameInfo, + thrust::raw_pointer_cast(&layerxX0[0]), bz, maxChi2ClusterAttachment, propagator, @@ -1155,6 +1011,7 @@ void processNeighboursHandler(const int startLayer, neighbours[iLayer - 1], neighboursDeviceLUTs[iLayer - 1], foundTrackingFrameInfo, + thrust::raw_pointer_cast(&layerxX0[0]), bz, maxChi2ClusterAttachment, propagator, @@ -1181,6 +1038,7 @@ void processNeighboursHandler(const int startLayer, neighbours[iLayer - 1], neighboursDeviceLUTs[iLayer - 1], foundTrackingFrameInfo, + thrust::raw_pointer_cast(&layerxX0[0]), bz, maxChi2ClusterAttachment, propagator, @@ -1202,6 +1060,7 @@ void countTrackSeedHandler(TrackSeed* trackSeeds, int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const float bz, const int startLevel, @@ -1219,6 +1078,7 @@ void countTrackSeedHandler(TrackSeed* trackSeeds, // small transferes! thrust::device_vector minPts(minPtsHost); thrust::device_vector layerRadii(layerRadiiHost); + thrust::device_vector layerxX0(layerxX0Host); gpu::fitTrackSeedsKernel<<<60, 256>>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** @@ -1227,6 +1087,7 @@ void countTrackSeedHandler(TrackSeed* trackSeeds, seedLUT, // int* thrust::raw_pointer_cast(&layerRadii[0]), // const float* thrust::raw_pointer_cast(&minPts[0]), // const float* + thrust::raw_pointer_cast(&layerxX0[0]), // const float* nSeeds, // const unsigned int bz, // const float startLevel, // const int @@ -1249,6 +1110,7 @@ void computeTrackSeedHandler(TrackSeed* trackSeeds, const int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const unsigned int nTracks, const float bz, @@ -1264,6 +1126,7 @@ void computeTrackSeedHandler(TrackSeed* trackSeeds, { thrust::device_vector minPts(minPtsHost); thrust::device_vector layerRadii(layerRadiiHost); + thrust::device_vector layerxX0(layerxX0Host); gpu::fitTrackSeedsKernel<<<60, 256>>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** @@ -1272,6 +1135,7 @@ void computeTrackSeedHandler(TrackSeed* trackSeeds, seedLUT, // const int* thrust::raw_pointer_cast(&layerRadii[0]), // const float* thrust::raw_pointer_cast(&minPts[0]), // const float* + thrust::raw_pointer_cast(&layerxX0[0]), // const float* nSeeds, // const unsigned int bz, // const float startLevel, // const int @@ -1359,6 +1223,7 @@ template void countCellsHandler<7>(const Cluster** sortedClusters, const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); @@ -1376,6 +1241,7 @@ template void computeCellsHandler<7>(const Cluster** sortedClusters, const float maxChi2ClusterAttachment, const float cellDeltaTanLambdaSigma, const float nSigmaCut, + const std::vector& layerxX0Host, gpu::Streams& streams); template void countCellNeighboursHandler<7>(CellSeed** cellsLayersDevice, @@ -1420,6 +1286,7 @@ template void processNeighboursHandler<7>(const int startLayer, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, + const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator* alloc); @@ -1430,6 +1297,7 @@ template void countTrackSeedHandler(TrackSeed<7>* trackSeeds, int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const float bz, const int startLevel, @@ -1449,6 +1317,7 @@ template void computeTrackSeedHandler(TrackSeed<7>* trackSeeds, const int* seedLUT, const std::vector& layerRadiiHost, const std::vector& minPtsHost, + const std::vector& layerxX0Host, const unsigned int nSeeds, const unsigned int nTracks, const float bz, diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/IndexTableUtils.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/IndexTableUtils.h index 118557c970c35..e2487208e9453 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/IndexTableUtils.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/IndexTableUtils.h @@ -18,9 +18,8 @@ #include -#include "ITStracking/Constants.h" -#include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" +#include "ITStracking/Cluster.h" +#include "ITStracking/MathUtils.h" #include "CommonConstants/MathConstants.h" #include "GPUCommonMath.h" #include "GPUCommonDef.h" @@ -91,7 +90,7 @@ GPUhdi() int IndexTableUtils::getPhiBinIndex(const float currentPhi) co template GPUhdi() int IndexTableUtils::getBinIndex(const int zIndex, const int phiIndex) const { - return o2::gpu::GPUCommonMath::Min(phiIndex * mNzBins + zIndex, mNzBins * mNphiBins - 1); + return o2::gpu::GPUCommonMath::Min(phiIndex * mNzBins + zIndex, (mNzBins * mNphiBins) - 1); } template @@ -113,5 +112,26 @@ GPUhdi() void IndexTableUtils::print() const } } +template +GPUhdi() int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, + const float z1, const float z2, const float maxdeltaz, const float maxdeltaphi, + const IndexTableUtils& utils) +{ + const float zRangeMin = o2::gpu::GPUCommonMath::Min(z1, z2) - maxdeltaz; + const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi; + const float zRangeMax = o2::gpu::GPUCommonMath::Max(z1, z2) + maxdeltaz; + const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi; + + if (zRangeMax < -utils.getLayerZ(layerIndex) || + zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { + return int4{0, 0, 0, 0}; + } + + return int4{o2::gpu::GPUCommonMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), + utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), + o2::gpu::GPUCommonMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)), + utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; +} + } // namespace o2::its #endif /* TRACKINGITSU_INCLUDE_INDEXTABLEUTILS_H_ */ diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h new file mode 100644 index 0000000000000..4543fee0aa29d --- /dev/null +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h @@ -0,0 +1,209 @@ +// Copyright 2019-2026 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +/// +/// \file TrackHelpers.h +/// \brief Shared host/device helpers for ITS tracker trait implementations +/// + +#ifndef O2_ITS_TRACKING_TRACKHELPERS_H_ +#define O2_ITS_TRACKING_TRACKHELPERS_H_ + +#include +#include + +#include "DataFormatsITS/TrackITS.h" +#include "GPUCommonLogger.h" +#include "ITStracking/Cell.h" +#include "ITStracking/Cluster.h" +#include "ITStracking/Constants.h" +#include "ITStracking/MathUtils.h" +#include "ITStracking/TimeFrame.h" +#include "DetectorsBase/Propagator.h" +#include "ReconstructionDataFormats/Track.h" + +namespace o2::its::track +{ + +GPUhdi() int selectReseedMidLayer(int minLayer, int maxLayer, int nLayers, const float* layerRadii) +{ + if (maxLayer - minLayer == nLayers - 1) { + return (minLayer + maxLayer) / 2; + } + + int midLayer = minLayer + 1; + const float midR = 0.5f * (layerRadii[maxLayer] + layerRadii[minLayer]); + float distanceToMidR = o2::gpu::CAMath::Abs(midR - layerRadii[midLayer]); + for (int iLayer = midLayer + 1; iLayer < maxLayer; ++iLayer) { + const float distance = o2::gpu::CAMath::Abs(midR - layerRadii[iLayer]); + if (distance < distanceToMidR) { + midLayer = iLayer; + distanceToMidR = distance; + } + } + return midLayer; +} + +GPUhdi() void resetTrackCovariance(TrackITSExt& track) +{ + track.resetCovariance(); + track.setCov(track.getQ2Pt() * track.getQ2Pt() * track.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); +} + +GPUhdi() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1, + const Cluster& cluster2, + const TrackingFrameInfo& tf3, + const float bz, + const bool reverse = false) +{ + const float sign = reverse ? -1.f : 1.f; + + float ca = NAN; + float sa = NAN; + o2::gpu::CAMath::SinCos(tf3.alphaTrackingFrame, sa, ca); + + const float x1 = (cluster1.xCoordinate * ca) + (cluster1.yCoordinate * sa); + const float y1 = (-cluster1.xCoordinate * sa) + (cluster1.yCoordinate * ca); + const float x2 = (cluster2.xCoordinate * ca) + (cluster2.yCoordinate * sa); + const float y2 = (-cluster2.xCoordinate * sa) + (cluster2.yCoordinate * ca); + const float x3 = tf3.xTrackingFrame; + const float y3 = tf3.positionTrackingFrame[0]; + + float snp = NAN; + float q2pt = NAN; + float q2pt2 = NAN; + if (o2::gpu::CAMath::Abs(bz) < 0.01f) { + const float dx = x3 - x1; + const float dy = y3 - y1; + snp = sign * dy / o2::gpu::CAMath::Hypot(dx, dy); + q2pt = 1.f / o2::track::kMostProbablePt; + q2pt2 = 1.f; + } else { + const float crv = math_utils::computeCurvature(x3, y3, x2, y2, x1, y1); + snp = sign * crv * (x3 - math_utils::computeCurvatureCentreX(x3, y3, x2, y2, x1, y1)); + q2pt = sign * crv / (bz * o2::constants::math::B2C); + q2pt2 = crv * crv; + } + + const float tgl = 0.5f * (math_utils::computeTanDipAngle(x1, y1, x2, y2, cluster1.zCoordinate, cluster2.zCoordinate) + + math_utils::computeTanDipAngle(x2, y2, x3, y3, cluster2.zCoordinate, tf3.positionTrackingFrame[1])); + const float sg2q2pt = o2::track::kC1Pt2max * (q2pt2 > 0.0005f ? (q2pt2 < 1.f ? q2pt2 : 1.f) : 0.0005f); + + return {x3, tf3.alphaTrackingFrame, {y3, tf3.positionTrackingFrame[1], snp, tgl, q2pt}, {tf3.covarianceTrackingFrame[0], tf3.covarianceTrackingFrame[1], tf3.covarianceTrackingFrame[2], 0.f, 0.f, o2::track::kCSnp2max, 0.f, 0.f, 0.f, o2::track::kCTgl2max, 0.f, 0.f, 0.f, 0.f, sg2q2pt}}; +} + +template +GPUhdi() TrackITSExt seedTrackForRefit(const TrackSeed& seed, + const TrackingFrameInfo* const* foundTrackingFrameInfo, + const Cluster* const* unsortedClusters, + const float* layerRadii, + const float bz, + const int reseedIfShorter) +{ + TrackITSExt temporaryTrack(seed); + int lrMin = NLayers; + int lrMax = 0; + for (int iL{0}; iL < NLayers; ++iL) { + const int idx = seed.getCluster(iL); + temporaryTrack.setExternalClusterIndex(iL, idx, idx != constants::UnusedIndex); + if (idx != constants::UnusedIndex) { + lrMin = o2::gpu::CAMath::Min(lrMin, iL); + lrMax = o2::gpu::CAMath::Max(lrMax, iL); + } + } + + const int ncl = temporaryTrack.getNClusters(); + if (ncl < reseedIfShorter && ncl > 1) { + const int lrMid = selectReseedMidLayer(lrMin, lrMax, NLayers, layerRadii); + const auto& cluster0TF = foundTrackingFrameInfo[lrMin][seed.getCluster(lrMin)]; + const auto& cluster1GL = unsortedClusters[lrMid][seed.getCluster(lrMid)]; + const auto& cluster2GL = unsortedClusters[lrMax][seed.getCluster(lrMax)]; + temporaryTrack.getParamIn() = buildTrackSeed(cluster2GL, cluster1GL, cluster0TF, bz, true); + } + + resetTrackCovariance(temporaryTrack); + return temporaryTrack; +} + +GPUhdi() bool fitTrack(TrackITSExt& trk, + int start, + int end, + int step, + float chi2clcut, + float chi2ndfcut, + float maxQoverPt, + int nCl, + const float bz, + const TrackingFrameInfo* const* tfInfos, + const float* layerxX0, + const o2::base::Propagator* propagator, + const o2::base::PropagatorF::MatCorrType matCorrType, + o2::track::TrackPar* linRef = nullptr, + const bool shiftRefToCluster = false) +{ + for (int iLayer{start}; iLayer != end; iLayer += step) { + if (trk.getClusterIndex(iLayer) == constants::UnusedIndex) { + continue; + } + + const TrackingFrameInfo& trackingHit = tfInfos[iLayer][trk.getClusterIndex(iLayer)]; + if (linRef) { + if (!trk.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame, *linRef, bz)) { + return false; + } + if (!propagator->propagateToX(trk, *linRef, trackingHit.xTrackingFrame, bz, + o2::base::PropagatorImpl::MAX_SIN_PHI, + o2::base::PropagatorImpl::MAX_STEP, + matCorrType)) { + return false; + } + if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { + if (!trk.correctForMaterial(*linRef, layerxX0[iLayer], layerxX0[iLayer] * constants::Radl * constants::Rho, true)) { + continue; + } + } + } else { + if (!trk.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame)) { + return false; + } + if (!propagator->propagateToX(trk, trackingHit.xTrackingFrame, bz, + o2::base::PropagatorImpl::MAX_SIN_PHI, + o2::base::PropagatorImpl::MAX_STEP, + matCorrType)) { + return false; + } + if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { + if (!trk.correctForMaterial(layerxX0[iLayer], layerxX0[iLayer] * constants::Radl * constants::Rho, true)) { + continue; + } + } + } + + const auto predChi2{trk.getPredictedChi2Quiet(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; + if ((nCl >= 3 && predChi2 > chi2clcut) || predChi2 < 0.f) { + return false; + } + trk.setChi2(trk.getChi2() + predChi2); + if (!trk.o2::track::TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { + return false; + } + if (linRef && shiftRefToCluster) { + linRef->setY(trackingHit.positionTrackingFrame[0]); + linRef->setZ(trackingHit.positionTrackingFrame[1]); + } + nCl++; + } + + return o2::gpu::CAMath::Abs(trk.getQ2Pt()) < maxQoverPt && trk.getChi2() < chi2ndfcut * (float)((nCl * 2) - 5); +} + +} // namespace o2::its::track + +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 92c652ce9bcb1..1c3c642429686 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -20,7 +20,6 @@ #include "DetectorsBase/Propagator.h" #include "ITStracking/Configuration.h" -#include "ITStracking/MathUtils.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/TimeFrame.h" #include "ITStracking/Cell.h" @@ -55,9 +54,15 @@ class TrackerTraits virtual void findRoads(const int iteration); template - void processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); + void processNeighbours(int iteration, int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); - void updateTrackingParameters(const std::vector& trkPars) { mTrkParams = trkPars; } + void acceptTracks(int iteration, bounded_vector& tracks, bounded_vector>& firstClusters, bounded_vector>& sharedFirstClusters); + void markTracks(int iteration, bounded_vector>& sharedFirstClusters); + + void updateTrackingParameters(const std::vector& trkPars) + { + mTrkParams = trkPars; + } TimeFrame* getTimeFrame() { return mTimeFrame; } virtual void setBz(float bz); @@ -68,12 +73,6 @@ class TrackerTraits auto getMemoryPool() const noexcept { return mMemoryPool; } // Others - GPUhd() static consteval int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; } - int4 getBinsRect(const int iteration, int layer, float phi, float maxdeltaphi, float z, float maxdeltaz) - const noexcept { return getBinsRect(iteration, layer, phi, maxdeltaphi, z, z, maxdeltaz); } - int4 getBinsRect(const int iteration, const Cluster& cls, int layer, float z1, float z2, float maxdeltaz, float maxdeltaphi) const noexcept { return getBinsRect(iteration, layer, cls.phi, maxdeltaphi, z1, z2, maxdeltaz); } - const int4 getBinsRect(const int iteration, int layer, float phi, float maxdeltaphi, float z1, float z2, float maxdeltaz) const noexcept; - void setNThreads(int n, std::shared_ptr& arena); int getNThreads() { return mTaskArena->max_concurrency(); } @@ -83,10 +82,6 @@ class TrackerTraits virtual int getTFNumberOfCells() const { return mTimeFrame->getNumberOfCells(); } private: - track::TrackParCov buildTrackSeed(const Cluster& cluster1, const Cluster& cluster2, const TrackingFrameInfo& tf3, bool reverse = false); - TrackITSExt seedTrackForRefit(const TrackSeedN& seed); - bool fitTrack(TrackITSExt& track, int start, int end, int step, float chi2clcut = o2::constants::math::VeryBig, float chi2ndfcut = o2::constants::math::VeryBig, float maxQoverPt = o2::constants::math::VeryBig, int nCl = 0, o2::track::TrackPar* refLin = nullptr); - std::shared_ptr mMemoryPool; std::shared_ptr mTaskArena; @@ -96,29 +91,8 @@ class TrackerTraits std::vector mTrkParams; float mBz{-999.f}; - bool mIsZeroField{false}; }; -template -inline const int4 TrackerTraits::getBinsRect(const int iteration, const int layerIndex, float phi, float maxdeltaphi, float z1, float z2, float maxdeltaz) const noexcept -{ - const float zRangeMin = o2::gpu::GPUCommonMath::Min(z1, z2) - maxdeltaz; - const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : phi - maxdeltaphi; - const float zRangeMax = o2::gpu::GPUCommonMath::Max(z1, z2) + maxdeltaz; - const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : phi + maxdeltaphi; - - if (zRangeMax < -mTrkParams[iteration].LayerZ[layerIndex] || - zRangeMin > mTrkParams[iteration].LayerZ[layerIndex] || zRangeMin > zRangeMax) { - return getEmptyBinsRect(); - } - - const IndexTableUtilsN& utils{mTimeFrame->getIndexTableUtils()}; - return int4{o2::gpu::GPUCommonMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), - utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::GPUCommonMath::Min(mTrkParams[iteration].ZBins - 1, utils.getZBinIndex(layerIndex, zRangeMax)), - utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; -} - } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 68e67ab3c6949..ed198dd8e8658 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -31,6 +31,7 @@ #include "ITStracking/IndexTableUtils.h" #include "ITStracking/ROFLookupTables.h" #include "ITStracking/TrackerTraits.h" +#include "ITStracking/TrackHelpers.h" #include "ITStracking/Tracklet.h" #include "ReconstructionDataFormats/Track.h" @@ -110,7 +111,9 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer const float sqInvDeltaZ0 = 1.f / (math_utils::Sq(currentCluster.zCoordinate - pv.getZ()) + constants::Tolerance); const float sigmaZ = o2::gpu::CAMath::Sqrt( math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInvDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * mTimeFrame->getMSangle(iLayer))); - const auto bins = getBinsRect(iteration, currentCluster, iLayer + 1, zAtRmin, zAtRmax, sigmaZ * mTrkParams[iteration].NSigmaCut, mTimeFrame->getPhiCut(iLayer)); + const auto bins = o2::its::getBinsRect(currentCluster, iLayer + 1, zAtRmin, zAtRmax, + sigmaZ * mTrkParams[iteration].NSigmaCut, mTimeFrame->getPhiCut(iLayer), + mTimeFrame->getIndexTableUtils()); if (bins.x == 0 && bins.y == 0 && bins.z == 0 && bins.w == 0) { continue; } @@ -294,7 +297,7 @@ void TrackerTraits::computeLayerCells(const int iteration) const auto& cluster1_glo = mTimeFrame->getUnsortedClusters()[iLayer][clusId[0]]; const auto& cluster2_glo = mTimeFrame->getUnsortedClusters()[iLayer + 1][clusId[1]]; const auto& cluster3_tf = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + 2)[clusId[2]]; - auto track{buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf)}; + auto track{o2::its::track::buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf, mBz)}; float chi2{0.f}; bool good{false}; @@ -309,7 +312,7 @@ void TrackerTraits::computeLayerCells(const int iteration) break; } - if (!track.correctForMaterial(mTrkParams[0].LayerxX0[iLayer + iC], mTrkParams[0].LayerxX0[iLayer + iC] * constants::Radl * constants::Rho, true)) { + if (!track.correctForMaterial(mTrkParams[iteration].LayerxX0[iLayer + iC], mTrkParams[iteration].LayerxX0[iLayer + iC] * constants::Radl * constants::Rho, true)) { break; } @@ -535,7 +538,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) template template -void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) +void TrackerTraits::processNeighbours(int iteration, int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) { auto propagator = o2::base::Propagator::Instance(); @@ -584,18 +587,18 @@ void TrackerTraits::processNeighbours(int iLayer, int iLevel, const bou continue; } - if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mTrkParams[0].CorrType)) { + if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mTrkParams[iteration].CorrType)) { continue; } - if (mTrkParams[0].CorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) { + if (mTrkParams[iteration].CorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { + if (!seed.correctForMaterial(mTrkParams[iteration].LayerxX0[iLayer - 1], mTrkParams[iteration].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) { continue; } } auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)}; - if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) { + if ((predChi2 > mTrkParams[iteration].MaxChi2ClusterAttachment) || predChi2 < 0.f) { continue; } seed.setChi2(seed.getChi2() + predChi2); @@ -662,10 +665,17 @@ void TrackerTraits::findRoads(const int iteration) bounded_vector> sharedFirstClusters(mTrkParams[iteration].NLayers, bounded_vector(mMemoryPool.get()), mMemoryPool.get()); firstClusters.resize(mTrkParams[iteration].NLayers); sharedFirstClusters.resize(mTrkParams[iteration].NLayers); + const auto propagator = o2::base::Propagator::Instance(); + const TrackingFrameInfo* tfInfos[NLayers]{}; + const Cluster* unsortedClusters[NLayers]{}; + for (int iLayer = 0; iLayer < NLayers; ++iLayer) { + tfInfos[iLayer] = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer).data(); + unsortedClusters[iLayer] = mTimeFrame->getUnsortedClusters()[iLayer].data(); + } for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { auto seedFilter = [&](const auto& seed) { - return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5); + return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[iteration].MaxChi2NDF * ((startLevel + 2) * 2 - 5); }; bounded_vector trackSeeds(mMemoryPool.get()); @@ -677,7 +687,7 @@ void TrackerTraits::findRoads(const int iteration) bounded_vector lastCellId(mMemoryPool.get()), updatedCellId(mMemoryPool.get()); bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); - processNeighbours(startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); + processNeighbours(iteration, startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); int level = startLevel; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { @@ -685,7 +695,7 @@ void TrackerTraits::findRoads(const int iteration) lastCellId.swap(updatedCellId); deepVectorClear(updatedCellSeed); /// tame the memory peaks deepVectorClear(updatedCellId); /// tame the memory peaks - processNeighbours(iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId); + processNeighbours(iteration, iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId); } deepVectorClear(lastCellId); /// tame the memory peaks deepVectorClear(lastCellSeed); /// tame the memory peaks @@ -703,29 +713,74 @@ void TrackerTraits::findRoads(const int iteration) bounded_vector tracks(mMemoryPool.get()); mTaskArena->execute([&] { auto forSeed = [&](auto Tag, int iSeed, int offset = 0) { - TrackITSExt temporaryTrack = seedTrackForRefit(trackSeeds[iSeed]); + TrackITSExt temporaryTrack = o2::its::track::seedTrackForRefit(trackSeeds[iSeed], + tfInfos, + unsortedClusters, + mTrkParams[iteration].LayerRadii.data(), + mBz, + mTrkParams[iteration].ReseedIfShorter); o2::track::TrackPar linRef{temporaryTrack}; - bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, o2::constants::math::VeryBig, 0, &linRef); + bool fitSuccess = o2::its::track::fitTrack(temporaryTrack, + 0, + mTrkParams[iteration].NLayers, + 1, + mTrkParams[iteration].MaxChi2ClusterAttachment, + mTrkParams[iteration].MaxChi2NDF, + o2::constants::math::VeryBig, + 0, + mBz, + tfInfos, + mTrkParams[iteration].LayerxX0.data(), + propagator, + mTrkParams[iteration].CorrType, + &linRef, + mTrkParams[iteration].ShiftRefToCluster); if (!fitSuccess) { return 0; } temporaryTrack.getParamOut() = temporaryTrack.getParamIn(); linRef = temporaryTrack.getParamOut(); // use refitted track as lin.reference - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); + track::resetTrackCovariance(temporaryTrack); temporaryTrack.setChi2(0); - fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f, 0, &linRef); + fitSuccess = o2::its::track::fitTrack(temporaryTrack, + mTrkParams[iteration].NLayers - 1, + -1, + -1, + mTrkParams[iteration].MaxChi2ClusterAttachment, + mTrkParams[iteration].MaxChi2NDF, + 50.f, + 0, + mBz, + tfInfos, + mTrkParams[iteration].LayerxX0.data(), + propagator, + mTrkParams[iteration].CorrType, + &linRef, + mTrkParams[iteration].ShiftRefToCluster); if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) { return 0; } - if (mTrkParams[0].RepeatRefitOut) { // repeat outward refit seeding and linearizing with the stable inward fit result + if (mTrkParams[iteration].RepeatRefitOut) { // repeat outward refit seeding and linearizing with the stable inward fit result o2::track::TrackParCov saveInw{temporaryTrack}; linRef = saveInw; // use refitted track as lin.reference float saveChi2 = temporaryTrack.getChi2(); - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); + track::resetTrackCovariance(temporaryTrack); temporaryTrack.setChi2(0); - fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, o2::constants::math::VeryBig, 0, &linRef); + fitSuccess = o2::its::track::fitTrack(temporaryTrack, + 0, + mTrkParams[iteration].NLayers, + 1, + mTrkParams[iteration].MaxChi2ClusterAttachment, + mTrkParams[iteration].MaxChi2NDF, + o2::constants::math::VeryBig, + 0, + mBz, + tfInfos, + mTrkParams[iteration].LayerxX0.data(), + propagator, + mTrkParams[iteration].CorrType, + &linRef, + mTrkParams[iteration].ShiftRefToCluster); if (!fitSuccess) { return 0; } @@ -752,6 +807,7 @@ void TrackerTraits::findRoads(const int iteration) forSeed(PassMode::OnePass{}, iSeed); } } else { + // The double-pass allows us to avoid sizeable memory spikes bounded_vector perSeedCount(nSeeds + 1, 0, mMemoryPool.get()); tbb::parallel_for(0, nSeeds, [&](const int iSeed) { perSeedCount[iSeed] = forSeed(PassMode::TwoPassCount{}, iSeed); @@ -779,221 +835,104 @@ void TrackerTraits::findRoads(const int iteration) return a.getChi2() < b.getChi2(); }); - mTimeFrame->getTracks().reserve(mTimeFrame->getTracks().size() + tracks.size()); - const float smallestROFHalf = mTimeFrame->getROFOverlapTableView().getClockLayer().mROFLength * 0.5f; - for (auto& track : tracks) { - int nShared = 0; - bool isFirstShared{false}; - int firstLayer{-1}, firstCluster{-1}; - for (int iLayer{0}; iLayer < mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - bool isShared = mTimeFrame->isClusterUsed(iLayer, track.getClusterIndex(iLayer)); - nShared += int(isShared); - if (firstLayer < 0) { - firstCluster = track.getClusterIndex(iLayer); - isFirstShared = isShared && mTrkParams[0].AllowSharingFirstCluster && std::find(firstClusters[iLayer].begin(), firstClusters[iLayer].end(), firstCluster) != firstClusters[iLayer].end(); - firstLayer = iLayer; - } - } + acceptTracks(iteration, tracks, firstClusters, sharedFirstClusters); + } + markTracks(iteration, sharedFirstClusters); +} - /// do not account for the first cluster in the shared clusters number if it is allowed - if (nShared - int(isFirstShared && mTrkParams[0].AllowSharingFirstCluster) > mTrkParams[0].ClusterSharing) { +template +void TrackerTraits::acceptTracks(int iteration, bounded_vector& tracks, bounded_vector>& firstClusters, bounded_vector>& sharedFirstClusters) +{ + const float smallestROFHalf = mTimeFrame->getROFOverlapTableView().getClockLayer().mROFLength * 0.5f; + for (auto& track : tracks) { + int nShared = 0; + bool isFirstShared{false}; + int firstLayer{-1}, firstCluster{-1}; + for (int iLayer{0}; iLayer < mTrkParams[iteration].NLayers; ++iLayer) { + if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { continue; } - - bool firstCls{true}; - TimeEstBC ts; - for (int iLayer{0}; iLayer < mTrkParams[0].NLayers; ++iLayer) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - mTimeFrame->markUsedCluster(iLayer, track.getClusterIndex(iLayer)); - int currentROF = mTimeFrame->getClusterROF(iLayer, track.getClusterIndex(iLayer)); - auto rofTS = mTimeFrame->getROFOverlapTableView().getLayer(iLayer).getROFTimeBounds(currentROF, true); - if (firstCls) { - firstCls = false; - ts = rofTS; - } else { - if (!ts.isCompatible(rofTS)) { - LOGP(fatal, "TS {}+/-{} are incompatible with {}+/-{}, this should not happen!", rofTS.getTimeStamp(), rofTS.getTimeStampError(), ts.getTimeStamp(), ts.getTimeStampError()); - } - ts += rofTS; - } - } - track.getTimeStamp() = ts.makeSymmetrical(); - if (track.getTimeStamp().getTimeStampError() > smallestROFHalf) { - track.getTimeStamp().setTimeStampError(smallestROFHalf); - } - track.setUserField(0); - track.getParamOut().setUserField(0); - mTimeFrame->getTracks().emplace_back(track); - - firstClusters[firstLayer].push_back(firstCluster); - if (isFirstShared) { - sharedFirstClusters[firstLayer].push_back(firstCluster); + bool isShared = mTimeFrame->isClusterUsed(iLayer, track.getClusterIndex(iLayer)); + nShared += int(isShared); + if (firstLayer < 0) { + firstCluster = track.getClusterIndex(iLayer); + isFirstShared = isShared && mTrkParams[iteration].AllowSharingFirstCluster && std::find(firstClusters[iLayer].begin(), firstClusters[iLayer].end(), firstCluster) != firstClusters[iLayer].end(); + firstLayer = iLayer; } } - } - /// Now we have to set the shared cluster flag - for (int iLayer{0}; iLayer < mTrkParams[0].NLayers; ++iLayer) { - std::sort(sharedFirstClusters[iLayer].begin(), sharedFirstClusters[iLayer].end()); - } + /// do not account for the first cluster in the shared clusters number if it is allowed + if (nShared - int(isFirstShared && mTrkParams[iteration].AllowSharingFirstCluster) > mTrkParams[iteration].ClusterSharing) { + continue; + } - for (auto& track : mTimeFrame->getTracks()) { - int firstLayer{mTrkParams[0].NLayers}, firstCluster{constants::UnusedIndex}; - for (int iLayer{0}; iLayer < mTrkParams[0].NLayers; ++iLayer) { + bool firstCls{true}; + TimeEstBC ts; + for (int iLayer{0}; iLayer < mTrkParams[iteration].NLayers; ++iLayer) { if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { continue; } - firstLayer = iLayer; - firstCluster = track.getClusterIndex(iLayer); - break; + mTimeFrame->markUsedCluster(iLayer, track.getClusterIndex(iLayer)); + int currentROF = mTimeFrame->getClusterROF(iLayer, track.getClusterIndex(iLayer)); + auto rofTS = mTimeFrame->getROFOverlapTableView().getLayer(iLayer).getROFTimeBounds(currentROF, true); + if (firstCls) { + firstCls = false; + ts = rofTS; + } else { + if (!ts.isCompatible(rofTS)) { + LOGP(fatal, "TS {}+/-{} are incompatible with {}+/-{}, this should not happen!", rofTS.getTimeStamp(), rofTS.getTimeStampError(), ts.getTimeStamp(), ts.getTimeStampError()); + } + ts += rofTS; + } } - if (std::binary_search(sharedFirstClusters[firstLayer].begin(), sharedFirstClusters[firstLayer].end(), firstCluster)) { - track.setSharedClusters(); + track.getTimeStamp() = ts.makeSymmetrical(); + if (track.getTimeStamp().getTimeStampError() > smallestROFHalf) { + track.getTimeStamp().setTimeStampError(smallestROFHalf); } - } -} -template -bool TrackerTraits::fitTrack(TrackITSExt& track, int start, int end, int step, float chi2clcut, float chi2ndfcut, float maxQoverPt, int nCl, o2::track::TrackPar* linRef) -{ - auto propInstance = o2::base::Propagator::Instance(); + track.setUserField(0); + track.getParamOut().setUserField(0); + mTimeFrame->getTracks().emplace_back(track); - for (int iLayer{start}; iLayer != end; iLayer += step) { - if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { - continue; - } - const TrackingFrameInfo& trackingHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer)[track.getClusterIndex(iLayer)]; - if (linRef) { - if (!track.rotate(trackingHit.alphaTrackingFrame, *linRef, getBz())) { - return false; - } - if (!propInstance->propagateToX(track, *linRef, trackingHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mTrkParams[0].CorrType)) { - return false; - } - if (mTrkParams[0].CorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - if (!track.correctForMaterial(*linRef, mTrkParams[0].LayerxX0[iLayer], mTrkParams[0].LayerxX0[iLayer] * constants::Radl * constants::Rho, true)) { - continue; - } - } - } else { - if (!track.rotate(trackingHit.alphaTrackingFrame)) { - return false; - } - if (!propInstance->propagateToX(track, trackingHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl::MAX_SIN_PHI, o2::base::PropagatorImpl::MAX_STEP, mTrkParams[0].CorrType)) { - return false; - } - if (mTrkParams[0].CorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - if (!track.correctForMaterial(mTrkParams[0].LayerxX0[iLayer], mTrkParams[0].LayerxX0[iLayer] * constants::Radl * constants::Rho, true)) { - continue; - } + if (mTrkParams[iteration].AllowSharingFirstCluster) { + firstClusters[firstLayer].push_back(firstCluster); + if (isFirstShared) { + sharedFirstClusters[firstLayer].push_back(firstCluster); } } - auto predChi2{track.getPredictedChi2Quiet(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; - if ((nCl >= 3 && predChi2 > chi2clcut) || predChi2 < 0.f) { - return false; - } - track.setChi2(track.getChi2() + predChi2); - if (!track.o2::track::TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { - return false; - } - if (linRef && mTrkParams[0].ShiftRefToCluster) { // displace the reference to the last updated cluster - linRef->setY(trackingHit.positionTrackingFrame[0]); - linRef->setZ(trackingHit.positionTrackingFrame[1]); - } - nCl++; } - return std::abs(track.getQ2Pt()) < maxQoverPt && track.getChi2() < chi2ndfcut * (float)((nCl * 2) - 5); } -// create a new seed either from the existing track inner param or reseed from the edgepointd and cluster in the middle template -TrackITSExt TrackerTraits::seedTrackForRefit(const TrackSeedN& seed) +void TrackerTraits::markTracks(int iteration, bounded_vector>& sharedFirstClusters) { - TrackITSExt temporaryTrack(seed); - int lrMin = NLayers, lrMax = 0, lrMid = 0; - for (int iL = 0; iL < NLayers; ++iL) { - const int idx = seed.getCluster(iL); - temporaryTrack.setExternalClusterIndex(iL, idx, idx != constants::UnusedIndex); - if (idx != constants::UnusedIndex) { - lrMin = o2::gpu::CAMath::Min(lrMin, iL); - lrMax = o2::gpu::CAMath::Max(lrMax, iL); + if (mTrkParams[iteration].AllowSharingFirstCluster) { + /// Now we have to set the shared cluster flag + for (int iLayer{0}; iLayer < mTrkParams[iteration].NLayers; ++iLayer) { + std::sort(sharedFirstClusters[iLayer].begin(), sharedFirstClusters[iLayer].end()); } - } - int ncl = temporaryTrack.getNClusters(); - if (ncl < mTrkParams[0].ReseedIfShorter) { // reseed with circle passing via edges and the midpoint - if (ncl == mTrkParams[0].NLayers) { - lrMin = 0; - lrMax = mTrkParams[0].NLayers - 1; - lrMid = (lrMin + lrMax) / 2; - } else { - lrMid = lrMin + 1; - float midR = 0.5 * (mTrkParams[0].LayerRadii[lrMax] + mTrkParams[0].LayerRadii[lrMin]), dstMidR = o2::gpu::GPUCommonMath::Abs(midR - mTrkParams[0].LayerRadii[lrMid]); - for (int iL = lrMid + 1; iL < lrMax; ++iL) { // find the midpoint as closest to the midR - auto dst = o2::gpu::GPUCommonMath::Abs(midR - mTrkParams[0].LayerRadii[iL]); - if (dst < dstMidR) { - lrMid = iL; - dstMidR = dst; + + for (auto& track : mTimeFrame->getTracks()) { + int firstLayer{mTrkParams[iteration].NLayers}, firstCluster{constants::UnusedIndex}; + for (int iLayer{0}; iLayer < mTrkParams[iteration].NLayers; ++iLayer) { + if (track.getClusterIndex(iLayer) == constants::UnusedIndex) { + continue; } + firstLayer = iLayer; + firstCluster = track.getClusterIndex(iLayer); + break; + } + if (std::binary_search(sharedFirstClusters[firstLayer].begin(), sharedFirstClusters[firstLayer].end(), firstCluster)) { + track.setSharedClusters(); } } - const auto& cluster0_tf = mTimeFrame->getTrackingFrameInfoOnLayer(lrMin)[seed.getCluster(lrMin)]; // if the sensor frame! - const auto& cluster1_gl = mTimeFrame->getUnsortedClusters()[lrMid][seed.getCluster(lrMid)]; // global frame - const auto& cluster2_gl = mTimeFrame->getUnsortedClusters()[lrMax][seed.getCluster(lrMax)]; // global frame - temporaryTrack.getParamIn() = buildTrackSeed(cluster2_gl, cluster1_gl, cluster0_tf, true); } - temporaryTrack.resetCovariance(); - temporaryTrack.setCov(temporaryTrack.getQ2Pt() * temporaryTrack.getQ2Pt() * temporaryTrack.getCov()[o2::track::CovLabels::kSigQ2Pt2], o2::track::CovLabels::kSigQ2Pt2); - return temporaryTrack; -} - -/// Clusters are given from inside outward (cluster3 is the outermost). The outermost cluster is given in the tracking -/// frame coordinates whereas the others are referred to the global frame. -template -track::TrackParCov TrackerTraits::buildTrackSeed(const Cluster& cluster1, const Cluster& cluster2, const TrackingFrameInfo& tf3, bool reverse) -{ - const float sign = reverse ? -1.f : 1.f; - - float ca = NAN, sa = NAN; - o2::gpu::CAMath::SinCos(tf3.alphaTrackingFrame, sa, ca); - - const float x1 = (cluster1.xCoordinate * ca) + (cluster1.yCoordinate * sa); - const float y1 = (-cluster1.xCoordinate * sa) + (cluster1.yCoordinate * ca); - const float x2 = (cluster2.xCoordinate * ca) + (cluster2.yCoordinate * sa); - const float y2 = (-cluster2.xCoordinate * sa) + (cluster2.yCoordinate * ca); - const float x3 = tf3.xTrackingFrame; - const float y3 = tf3.positionTrackingFrame[0]; - - float snp = NAN, q2pt = NAN, q2pt2 = NAN; - if (mIsZeroField) { - const float dx = x3 - x1; - const float dy = y3 - y1; - snp = sign * dy / o2::gpu::CAMath::Hypot(dx, dy); - q2pt = 1.f / track::kMostProbablePt; - q2pt2 = 1.f; - } else { - const float crv = math_utils::computeCurvature(x3, y3, x2, y2, x1, y1); - snp = sign * crv * (x3 - math_utils::computeCurvatureCentreX(x3, y3, x2, y2, x1, y1)); - q2pt = sign * crv / (mBz * o2::constants::math::B2C); - q2pt2 = crv * crv; - } - - const float tgl = 0.5f * (math_utils::computeTanDipAngle(x1, y1, x2, y2, cluster1.zCoordinate, cluster2.zCoordinate) + - math_utils::computeTanDipAngle(x2, y2, x3, y3, cluster2.zCoordinate, tf3.positionTrackingFrame[1])); - const float sg2q2pt = track::kC1Pt2max * (q2pt2 > 0.0005f ? (q2pt2 < 1.f ? q2pt2 : 1.f) : 0.0005f); - - return {x3, tf3.alphaTrackingFrame, {y3, tf3.positionTrackingFrame[1], snp, tgl, q2pt}, {tf3.covarianceTrackingFrame[0], tf3.covarianceTrackingFrame[1], tf3.covarianceTrackingFrame[2], 0.f, 0.f, track::kCSnp2max, 0.f, 0.f, 0.f, track::kCTgl2max, 0.f, 0.f, 0.f, 0.f, sg2q2pt}}; } template void TrackerTraits::setBz(float bz) { mBz = bz; - mIsZeroField = std::abs(mBz) < 0.01; mTimeFrame->setBz(bz); } @@ -1013,13 +952,13 @@ void TrackerTraits::setNThreads(int n, std::shared_ptr } template class TrackerTraits<7>; -template void TrackerTraits<7>::processNeighbours(int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); -template void TrackerTraits<7>::processNeighbours>(int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<7>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<7>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); // ALICE3 upgrade #ifdef ENABLE_UPGRADES template class TrackerTraits<11>; -template void TrackerTraits<11>::processNeighbours(int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); -template void TrackerTraits<11>::processNeighbours>(int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<11>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<11>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); #endif } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 427831b1ed484..5e6a6ae81c90a 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -192,7 +192,6 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) if (mRunVertexer) { // Run seeding vertexer vertexerElapsedTime = mVertexer->clustersToVertices(logger); - // FIXME: this is a temporary stop-gap measure until we figure the rest out const auto& vtx = mTimeFrame->getPrimaryVertices(); vertices.insert(vertices.begin(), vtx.begin(), vtx.end()); if (mIsMC) {