Skip to content

Commit 05ede90

Browse files
committed
ITS: avoid memory shuffeling by accessing tracks via idx
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 6d61b61 commit 05ede90

7 files changed

Lines changed: 43 additions & 11 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
9393
void createTrackITSExtDevice(const size_t);
9494
void createTrackExtensionScratchDevice(const int nThreads, const int maxHypotheses);
9595
void downloadTrackITSExtDevice();
96+
void downloadTrackIndicesDevice();
9697
void downloadCellsNeighboursDevice(std::vector<bounded_vector<CellNeighbour>>&, const int);
9798
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
9899
void downloadCellsDevice();
@@ -120,13 +121,15 @@ class TimeFrameGPU : public TimeFrame<NLayers>
120121
const auto getDeviceTrackingTopologyView() const { return mDeviceTrackingTopologyView; }
121122
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
122123
auto& getTrackITSExt() { return mTrackITSExt; }
124+
auto& getTrackIndices() { return mTrackIndices; }
123125
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
124126
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
125127
unsigned char* getDeviceUsedClusters(const int);
126128
const o2::base::Propagator* getChainPropagator();
127129

128130
// Hybrid
129131
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
132+
int* getDeviceTrackIndices() { return mTrackIndicesDevice; }
130133
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
131134
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
132135
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
@@ -226,6 +229,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
226229
float** mCellSeedsChi2DeviceArray;
227230

228231
TrackITSExt* mTrackITSExtDevice;
232+
int* mTrackIndicesDevice{nullptr};
229233
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
230234
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
231235
std::array<CellNeighbour*, MaxCells> mNeighboursDevice{};
@@ -244,6 +248,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
244248

245249
// Temporary buffer for storing output tracks from GPU tracking
246250
bounded_vector<TrackITSExt> mTrackITSExt;
251+
bounded_vector<int> mTrackIndices;
247252
};
248253

249254
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
232232
const int** clustersIndexTables,
233233
const int** ROFClusters,
234234
o2::its::TrackITSExt* tracks,
235+
int* trackIndices,
235236
const int* seedLUT,
236237
TrackExtensionHypothesis<NLayers>* activeHypotheses,
237238
TrackExtensionHypothesis<NLayers>* nextHypotheses,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,11 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
578578
GPUChkErrS(cudaMemcpy(&mNTracks, mTrackSeedsLUTDevice + nSeeds, sizeof(int), cudaMemcpyDeviceToHost));
579579
GPULog("gpu-allocation: reserving {} tracks, for {:.2f} MB.", mNTracks, mNTracks * sizeof(o2::its::TrackITSExt) / constants::MB);
580580
mTrackITSExt = bounded_vector<TrackITSExt>(mNTracks, {}, this->getMemoryPool().get());
581+
mTrackIndices = bounded_vector<int>(mNTracks, 0, this->getMemoryPool().get());
581582
allocMem(reinterpret_cast<void**>(&mTrackITSExtDevice), mNTracks * sizeof(o2::its::TrackITSExt), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
582583
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
584+
GPULog("gpu-allocation: reserving {} track indices, for {:.2f} MB.", mNTracks, mNTracks * sizeof(int) / constants::MB);
585+
allocMem(reinterpret_cast<void**>(&mTrackIndicesDevice), mNTracks * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
583586
}
584587

585588
template <int NLayers>
@@ -643,6 +646,14 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
643646
GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost));
644647
}
645648

649+
template <int NLayers>
650+
void TimeFrameGPU<NLayers>::downloadTrackIndicesDevice()
651+
{
652+
GPUTimer timer("downloading track indices");
653+
GPULog("gpu-transfer: downloading {} track indices, for {:.2f} MB.", mTrackIndices.size(), mTrackIndices.size() * sizeof(int) / constants::MB);
654+
GPUChkErrS(cudaMemcpy(mTrackIndices.data(), mTrackIndicesDevice, mTrackIndices.size() * sizeof(int), cudaMemcpyDeviceToHost));
655+
}
656+
646657
template <int NLayers>
647658
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
648659
{

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ void TrackerTraitsGPU<NLayers>::findRoads(const int iteration)
380380
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
381381
mTimeFrameGPU->getDeviceROFrameClusters(),
382382
mTimeFrameGPU->getDeviceTrackITSExt(),
383+
mTimeFrameGPU->getDeviceTrackIndices(),
383384
mTimeFrameGPU->getDeviceTrackSeedsLUT(),
384385
extendTracks ? mTimeFrameGPU->getDeviceActiveTrackExtensionHypotheses() : nullptr,
385386
extendTracks ? mTimeFrameGPU->getDeviceNextTrackExtensionHypotheses() : nullptr,
@@ -405,9 +406,11 @@ void TrackerTraitsGPU<NLayers>::findRoads(const int iteration)
405406
this->mTrkParams[iteration].CorrType,
406407
mTimeFrameGPU->getFrameworkAllocator());
407408
mTimeFrameGPU->downloadTrackITSExtDevice();
409+
mTimeFrameGPU->downloadTrackIndicesDevice();
408410

409411
auto& tracks = mTimeFrameGPU->getTrackITSExt();
410-
this->acceptTracks(iteration, tracks, firstClusters);
412+
const auto& trackIndices = mTimeFrameGPU->getTrackIndices();
413+
this->acceptTracks(iteration, tracks, trackIndices, firstClusters);
411414
mTimeFrameGPU->loadUsedClustersDevice();
412415
}
413416
this->markTracks(iteration);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <thrust/execution_policy.h>
1818
#include <thrust/device_ptr.h>
1919
#include <thrust/device_vector.h>
20+
#include <thrust/sequence.h>
2021
#include <thrust/sort.h>
2122
#include <thrust/reduce.h>
2223
#include <thrust/functional.h>
@@ -86,10 +87,12 @@ struct is_valid_pair {
8687
}
8788
};
8889

89-
struct compare_track_chi2 {
90-
GPUhd() bool operator()(const TrackITSExt& a, const TrackITSExt& b) const
90+
struct compare_track_index_chi2 {
91+
const TrackITSExt* tracks;
92+
93+
GPUhd() bool operator()(const int a, const int b) const
9194
{
92-
return o2::its::track::isBetter(a, b);
95+
return o2::its::track::isBetter(tracks[a], tracks[b]);
9396
}
9497
};
9598

@@ -1159,6 +1162,7 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
11591162
const int** clustersIndexTables,
11601163
const int** ROFClusters,
11611164
o2::its::TrackITSExt* tracks,
1165+
int* trackIndices,
11621166
const int* seedLUT,
11631167
TrackExtensionHypothesis<NLayers>* activeHypotheses,
11641168
TrackExtensionHypothesis<NLayers>* nextHypotheses,
@@ -1222,8 +1226,9 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
12221226
propagator, // const o2::base::Propagator*
12231227
matCorrType); // o2::base::PropagatorF::MatCorrType
12241228
auto sync_policy = THRUST_NAMESPACE::par(gpu::TypedAllocator<char>(alloc));
1225-
thrust::device_ptr<o2::its::TrackITSExt> tr_ptr(tracks);
1226-
thrust::sort(sync_policy, tr_ptr, tr_ptr + nTracks, gpu::compare_track_chi2());
1229+
thrust::device_ptr<int> trackIndicesPtr(trackIndices);
1230+
thrust::sequence(sync_policy, trackIndicesPtr, trackIndicesPtr + nTracks);
1231+
thrust::sort(sync_policy, trackIndicesPtr, trackIndicesPtr + nTracks, gpu::compare_track_index_chi2{tracks});
12271232
}
12281233

12291234
/// Explicit instantiation of ITS2 handlers
@@ -1401,6 +1406,7 @@ template void computeTrackSeedHandler(TrackSeed<7>* trackSeeds,
14011406
const int** clustersIndexTables,
14021407
const int** ROFClusters,
14031408
o2::its::TrackITSExt* tracks,
1409+
int* trackIndices,
14041410
const int* seedLUT,
14051411
TrackExtensionHypothesis<7>* activeHypotheses,
14061412
TrackExtensionHypothesis<7>* nextHypotheses,
@@ -1602,6 +1608,7 @@ template void computeTrackSeedHandler(TrackSeed<11>* trackSeeds,
16021608
const int** clustersIndexTables,
16031609
const int** ROFClusters,
16041610
o2::its::TrackITSExt* tracks,
1611+
int* trackIndices,
16051612
const int* seedLUT,
16061613
TrackExtensionHypothesis<11>* activeHypotheses,
16071614
TrackExtensionHypothesis<11>* nextHypotheses,

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class TrackerTraits
5959
template <typename InputSeed>
6060
void processNeighbours(int iteration, int defaultCellTopologyId, int iLevel, const bounded_vector<InputSeed>& currentCellSeed, const bounded_vector<int>& currentCellId, const bounded_vector<int>& currentCellTopologyId, bounded_vector<TrackSeedN>& updatedCellSeed, bounded_vector<int>& updatedCellId, bounded_vector<int>& updatedCellTopologyId);
6161

62-
void acceptTracks(int iteration, bounded_vector<TrackITSExt>& tracks, bounded_vector<bounded_vector<int>>& firstClusters);
62+
void acceptTracks(int iteration, bounded_vector<TrackITSExt>& tracks, const bounded_vector<int>& trackIndices, bounded_vector<bounded_vector<int>>& firstClusters);
6363
void markTracks(int iteration);
6464

6565
void updateTrackingParameters(const std::vector<TrackingParameters>& trkPars)

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -846,24 +846,29 @@ void TrackerTraits<NLayers>::findRoads(const int iteration)
846846
deepVectorClear(trackSeeds);
847847
});
848848

849-
std::sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) {
850-
return track::isBetter(a, b);
849+
// Sort tracks via indices to avoid moving TrackITSExt objects.
850+
bounded_vector<int> trackIndices(tracks.size(), mMemoryPool.get());
851+
std::iota(trackIndices.begin(), trackIndices.end(), 0);
852+
std::sort(trackIndices.begin(), trackIndices.end(), [&tracks](int a, int b) {
853+
return track::isBetter(tracks[a], tracks[b]);
851854
});
852855

853-
acceptTracks(iteration, tracks, firstClusters);
856+
acceptTracks(iteration, tracks, trackIndices, firstClusters);
854857
}
855858
markTracks(iteration);
856859
}
857860

858861
template <int NLayers>
859862
void TrackerTraits<NLayers>::acceptTracks(int iteration,
860863
bounded_vector<TrackITSExt>& tracks,
864+
const bounded_vector<int>& trackIndices,
861865
bounded_vector<bounded_vector<int>>& firstClusters)
862866
{
863867
auto& trks = mTimeFrame->getTracks();
864868
trks.reserve(trks.size() + tracks.size());
865869
const float smallestROFHalf = mTimeFrame->getROFOverlapTableView().getClockLayer().mROFLength * 0.5f;
866-
for (auto& track : tracks) {
870+
for (size_t trackId{0}; trackId < trackIndices.size(); ++trackId) {
871+
auto& track = tracks[trackIndices[trackId]];
867872
int nShared = 0;
868873
bool isFirstShared{false};
869874
int firstLayer{-1}, firstCluster{-1};

0 commit comments

Comments
 (0)