From ef13cf45ba3b4661cd5d3e031c8446e00d6cbe7c Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Mon, 21 Oct 2024 01:11:26 +0000 Subject: [PATCH] Update API docs (fd6d7841). --- xml/FlatIndex_8cuh.xml | 1 + xml/GpuIndexCagra_8h.xml | 565 ++++++++-------- xml/GpuIndex_8h.xml | 395 +++++------ xml/GpuResources_8h.xml | 611 +++++++++--------- xml/RaftCagra_8cuh.xml | 1 + xml/RaftFlatIndex_8cuh.xml | 1 + xml/RaftIVFFlat_8cuh.xml | 1 + xml/RaftIVFPQ_8cuh.xml | 1 + xml/RaftUtils_8h.xml | 151 ++--- xml/StandardGpuResources_8h.xml | 519 +++++++-------- xml/classfaiss_1_1gpu_1_1GpuIndex.xml | 50 +- xml/classfaiss_1_1gpu_1_1GpuIndexFlat.xml | 26 +- xml/classfaiss_1_1gpu_1_1GpuIndexFlatIP.xml | 26 +- xml/classfaiss_1_1gpu_1_1GpuIndexFlatL2.xml | 26 +- xml/classfaiss_1_1gpu_1_1GpuIndexIVF.xml | 32 +- xml/classfaiss_1_1gpu_1_1GpuIndexIVFFlat.xml | 32 +- xml/classfaiss_1_1gpu_1_1GpuIndexIVFPQ.xml | 32 +- ...s_1_1gpu_1_1GpuIndexIVFScalarQuantizer.xml | 32 +- xml/classfaiss_1_1gpu_1_1GpuResources.xml | 40 +- ...ssfaiss_1_1gpu_1_1GpuResourcesProvider.xml | 6 +- ...pu_1_1GpuResourcesProviderFromInstance.xml | 10 +- ...ssfaiss_1_1gpu_1_1StandardGpuResources.xml | 32 +- ...iss_1_1gpu_1_1StandardGpuResourcesImpl.xml | 84 +-- xml/namespacefaiss_1_1gpu.xml | 36 +- xml/structfaiss_1_1gpu_1_1AllocInfo.xml | 16 +- xml/structfaiss_1_1gpu_1_1AllocRequest.xml | 20 +- ...iss_1_1gpu_1_1GpuIndexBinaryFlatConfig.xml | 6 +- xml/structfaiss_1_1gpu_1_1GpuIndexCagra.xml | 56 +- ...uctfaiss_1_1gpu_1_1GpuIndexCagraConfig.xml | 20 +- xml/structfaiss_1_1gpu_1_1GpuIndexConfig.xml | 8 +- ...ructfaiss_1_1gpu_1_1GpuIndexFlatConfig.xml | 6 +- ...tructfaiss_1_1gpu_1_1GpuIndexIVFConfig.xml | 6 +- ...tfaiss_1_1gpu_1_1GpuIndexIVFFlatConfig.xml | 6 +- ...uctfaiss_1_1gpu_1_1GpuIndexIVFPQConfig.xml | 6 +- ...pu_1_1GpuIndexIVFScalarQuantizerConfig.xml | 6 +- ...ctfaiss_1_1gpu_1_1GpuMemoryReservation.xml | 26 +- ...tfaiss_1_1gpu_1_1IVFPQBuildCagraConfig.xml | 18 +- ...faiss_1_1gpu_1_1IVFPQSearchCagraConfig.xml | 10 +- ...tfaiss_1_1gpu_1_1SearchParametersCagra.xml | 28 +- 39 files changed, 1479 insertions(+), 1469 deletions(-) diff --git a/xml/FlatIndex_8cuh.xml b/xml/FlatIndex_8cuh.xml index 7c7bbc30b0..d122866843 100644 --- a/xml/FlatIndex_8cuh.xml +++ b/xml/FlatIndex_8cuh.xml @@ -7,6 +7,7 @@ +//@lint-ignore-everyLICENSELINT /** *Copyright(c)Facebook,Inc.anditsaffiliates. * diff --git a/xml/GpuIndexCagra_8h.xml b/xml/GpuIndexCagra_8h.xml index ff2581f05c..69635604da 100644 --- a/xml/GpuIndexCagra_8h.xml +++ b/xml/GpuIndexCagra_8h.xml @@ -327,288 +327,289 @@ -/** -*Copyright(c)Facebook,Inc.anditsaffiliates. -* -*ThissourcecodeislicensedundertheMITlicensefoundinthe -*LICENSEfileintherootdirectoryofthissourcetree. -*/ -/* -*Copyright(c)2024,NVIDIACORPORATION. -* -*LicensedundertheApacheLicense,Version2.0(the"License"); -*youmaynotusethisfileexceptincompliancewiththeLicense. -*YoumayobtainacopyoftheLicenseat -* -*http://www.apache.org/licenses/LICENSE-2.0 -* -*Unlessrequiredbyapplicablelaworagreedtoinwriting,software -*distributedundertheLicenseisdistributedonan"ASIS"BASIS, -*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. -*SeetheLicenseforthespecificlanguagegoverningpermissionsand -*limitationsundertheLicense. -*/ - -#pragmaonce - -#include<faiss/IndexIVF.h> -#include<faiss/gpu/GpuIndex.h> -#include<faiss/gpu/GpuIndexIVFPQ.h> - -namespacefaiss{ -structIndexHNSWCagra; -} - -namespacefaiss{ -namespacegpu{ - -classRaftCagra; - -enumclassgraph_build_algo{ -///UseIVF-PQtobuildall-neighborsknngraph -IVF_PQ, -///Experimental,useNN-Descenttobuildall-neighborsknngraph -NN_DESCENT -}; - -///AtypeforspecifyinghowPQcodebooksarecreated. -enumclasscodebook_gen{//NOLINT -PER_SUBSPACE=0,//NOLINT -PER_CLUSTER=1,//NOLINT -}; - -structIVFPQBuildCagraConfig{ -/// -///Thenumberofinvertedlists(clusters) -/// -///Hint:thenumberofvectorspercluster(`n_rows/n_lists`)shouldbe -///approximately1,000to10,000. - -uint32_tn_lists=1024; -///Thenumberofiterationssearchingforkmeanscenters(indexbuilding). -uint32_tkmeans_n_iters=20; -///Thefractionofdatatouseduringiterativekmeansbuilding. -doublekmeans_trainset_fraction=0.5; -/// -///ThebitlengthofthevectorelementaftercompressionbyPQ. -/// -///Possiblevalues:[4,5,6,7,8]. -/// -///Hint:thesmallerthe'pq_bits',thesmallertheindexsizeandthe -///betterthesearchperformance,butthelowertherecall. - -uint32_tpq_bits=8; -/// -///ThedimensionalityofthevectoraftercompressionbyPQ.Whenzero,an -///optimalvalueisselectedusingaheuristic. -/// -///NB:`pq_dim///pq_bits`mustbeamultipleof8. -/// -///Hint:asmaller'pq_dim'resultsinasmallerindexsizeandbetter -///searchperformance,butlowerrecall.If'pq_bits'is8,'pq_dim'canbe -///settoanynumber,butmultipleof8aredesirableforgoodperformance. -///If'pq_bits'isnot8,'pq_dim'shouldbeamultipleof8.Forgood -///performance,itisdesirablethat'pq_dim'isamultipleof32.Ideally, -///'pq_dim'shouldbealsoadivisorofthedatasetdim. - -uint32_tpq_dim=0; -///HowPQcodebooksarecreated. -codebook_gencodebook_kind=codebook_gen::PER_SUBSPACE; -/// -///Applyarandomrotationmatrixontheinputdataandqueriesevenif -///`dim%pq_dim==0`. -/// -///Note:if`dim`isnotmultipleof`pq_dim`,arandomrotationisalways -///appliedtotheinputdataandqueriestotransformtheworkingspace -///from`dim`to`rot_dim`,whichmaybeslightlylargerthantheoriginal -///spaceandandisamultipleof`pq_dim`(`rot_dim%pq_dim==0`). -///However,thistransformisnotnecessarywhen`dim`ismultipleof -///`pq_dim` -///(`dim==rot_dim`,hencenoneedinadding"extra"datacolumns/ -///features). -/// -///Bydefault,if`dim==rot_dim`,therotationtransformisinitialized -///withtheidentitymatrix.When`force_random_rotation==true`,arandom -///orthogonaltransformmatrixisgeneratedregardlessofthevaluesof -///`dim`and`pq_dim`. - -boolforce_random_rotation=false; -/// -///Bydefault,thealgorithmallocatesmorespacethannecessaryfor -///individualclusters -///(`list_data`).Thisallowstoamortizethecostofmemoryallocationand -///reducethenumberofdatacopiesduringrepeatedcallsto`extend` -///(extendingthedatabase). -/// -///Thealternativeistheconservativeallocationbehavior;whenenabled, -///thealgorithmalwaysallocatestheminimumamountofmemoryrequiredto -///storethegivennumberofrecords.Setthisflagto`true`ifyouprefer -///touseaslittleGPUmemoryforthedatabaseaspossible. - -boolconservative_memory_allocation=false; -}; - -structIVFPQSearchCagraConfig{ -///Thenumberofclusterstosearch. -uint32_tn_probes=20; -/// -///Datatypeoflookuptabletobecreateddynamicallyatsearchtime. -/// -///Possiblevalues:[CUDA_R_32F,CUDA_R_16F,CUDA_R_8U] -/// -///Theuseoflow-precisiontypesreducestheamountofsharedmemory -///requiredatsearchtime,sofastsharedmemorykernelscanbeusedeven -///fordatasetswithlargedimansionality.Notethattherecallisslightly -///degradedwhenlow-precisiontypeisselected. - -cudaDataType_tlut_dtype=CUDA_R_32F; -/// -///Storagedatatypefordistance/similaritycomputedatsearchtime. -/// -///Possiblevalues:[CUDA_R_16F,CUDA_R_32F] -/// -///Iftheperformancelimiteratsearchtimeisdevicememoryaccess, -///selectingFP16willimproveperformanceslightly. - -cudaDataType_tinternal_distance_dtype=CUDA_R_32F; -/// -///PreferredfractionofSM'sunifiedmemory/L1cachetobeusedas -///sharedmemory. -/// -///Possiblevalues:[0.0-1.0]asafractionofthe -///`sharedMemPerMultiprocessor`. -/// -///OnewantstoincreasethecarveouttomakesureagoodGPUoccupancyfor -///themainsearchkernel,butnottokeepittoohightoleavesomememory -///tobeusedasL1cache.Note,thisvalueisinterpretedonlyasahint. -///Moreover,aGPUusuallyallowsonlyafixedsetofcacheconfigurations, -///sotheprovidedvalueisroundeduptothenearestconfiguration.Refer -///totheNVIDIAtuningguideforthetargetGPUarchitecture. -/// -///Note,thisisalow-leveltuningparameterthatcanhavedrastic -///negativeeffectsonthesearchperformanceiftweakedincorrectly. - -doublepreferred_shmem_carveout=1.0; -}; - -structGpuIndexCagraConfig:publicGpuIndexConfig{ -///Degreeofinputgraphforpruning. -size_tintermediate_graph_degree=128; -///Degreeofoutputgraph. -size_tgraph_degree=64; -///ANNalgorithmtobuildknngraph. -graph_build_algobuild_algo=graph_build_algo::IVF_PQ; -///NumberofIterationstorunifbuildingwithNN_DESCENT -size_tnn_descent_niter=20; - -IVFPQBuildCagraConfig*ivf_pq_params=nullptr; -IVFPQSearchCagraConfig*ivf_pq_search_params=nullptr; -}; - -enumclasssearch_algo{ -///Forlargebatchsizes. -SINGLE_CTA, -///Forsmallbatchsizes. -MULTI_CTA, -MULTI_KERNEL, -AUTO -}; - -enumclasshash_mode{HASH,SMALL,AUTO}; - -structSearchParametersCagra:SearchParameters{ -///Maximumnumberofqueriestosearchatthesametime(batchsize).Auto -///selectwhen0. -size_tmax_queries=0; - -///Numberofintermediatesearchresultsretainedduringthesearch. -/// -///Thisisthemainknobtoadjusttradeoffbetweenaccuracyandsearch -///speed.Highervaluesimprovethesearchaccuracy. - -size_titopk_size=64; - -///Upperlimitofsearchiterations.Autoselectwhen0. -size_tmax_iterations=0; - -//Inthefollowingwelistadditionalsearchparametersforfinetuning. -//Reasonabledefaultvaluesareautomaticallychosen. - -///Whichsearchimplementationtouse. -search_algoalgo=search_algo::AUTO; - -///Numberofthreadsusedtocalculateasingledistance.4,8,16,or32. - -size_tteam_size=0; - -///Numberofgraphnodestoselectasthestartingpointforthesearchin -///eachiteration.akasearchwidth? -size_tsearch_width=1; -///Lowerlimitofsearchiterations. -size_tmin_iterations=0; - -///Threadblocksize.0,64,128,256,512,1024.Autoselectionwhen0. -size_tthread_block_size=0; -///Hashmaptype.AutoselectionwhenAUTO. -hash_modehashmap_mode=hash_mode::AUTO; -///Lowerlimitofhashmapbitlength.Morethan8. -size_thashmap_min_bitlen=0; -///Upperlimitofhashmapfillrate.Morethan0.1,lessthan0.9. -floathashmap_max_fill_rate=0.5; - -///Numberofiterationsofinitialrandomseednodeselection.1ormore. - -uint32_tnum_random_samplings=1; -///Bitmaskusedforinitialrandomseednodeselection. -uint64_tseed=0x128394; -}; - -structGpuIndexCagra:publicGpuIndex{ -public: -GpuIndexCagra( -GpuResourcesProvider*provider, -intdims, -faiss::MetricTypemetric=faiss::METRIC_L2, -GpuIndexCagraConfigconfig=GpuIndexCagraConfig()); - -///TrainsCAGRAbasedonthegivenvectordata -voidtrain(idx_tn,constfloat*x)override; - -///InitializeourselvesfromthegivenCPUindex;willoverwrite -///alldatainourselves -voidcopyFrom(constfaiss::IndexHNSWCagra*index); - -///CopyourselvestothegivenCPUindex;willoverwritealldata -///intheindexinstance -voidcopyTo(faiss::IndexHNSWCagra*index)const; - -voidreset()override; - -std::vector<idx_t>get_knngraph()const; - -protected: -booladdImplRequiresIDs_()constoverride; - -voidaddImpl_(idx_tn,constfloat*x,constidx_t*ids)override; - -///CalledfromGpuIndexforsearch -voidsearchImpl_( -idx_tn, -constfloat*x, -intk, -float*distances, -idx_t*labels, -constSearchParameters*search_params)constoverride; - -///Ourconfigurationoptions -constGpuIndexCagraConfigcagraConfig_; - -///Instancethatweown;containstheinvertedlists -std::shared_ptr<RaftCagra>index_; -}; - -}//namespacegpu -}//namespacefaiss +//@lint-ignore-everyLICENSELINT +/** +*Copyright(c)Facebook,Inc.anditsaffiliates. +* +*ThissourcecodeislicensedundertheMITlicensefoundinthe +*LICENSEfileintherootdirectoryofthissourcetree. +*/ +/* +*Copyright(c)2024,NVIDIACORPORATION. +* +*LicensedundertheApacheLicense,Version2.0(the"License"); +*youmaynotusethisfileexceptincompliancewiththeLicense. +*YoumayobtainacopyoftheLicenseat +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +*Unlessrequiredbyapplicablelaworagreedtoinwriting,software +*distributedundertheLicenseisdistributedonan"ASIS"BASIS, +*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. +*SeetheLicenseforthespecificlanguagegoverningpermissionsand +*limitationsundertheLicense. +*/ + +#pragmaonce + +#include<faiss/IndexIVF.h> +#include<faiss/gpu/GpuIndex.h> +#include<faiss/gpu/GpuIndexIVFPQ.h> + +namespacefaiss{ +structIndexHNSWCagra; +} + +namespacefaiss{ +namespacegpu{ + +classRaftCagra; + +enumclassgraph_build_algo{ +///UseIVF-PQtobuildall-neighborsknngraph +IVF_PQ, +///Experimental,useNN-Descenttobuildall-neighborsknngraph +NN_DESCENT +}; + +///AtypeforspecifyinghowPQcodebooksarecreated. +enumclasscodebook_gen{//NOLINT +PER_SUBSPACE=0,//NOLINT +PER_CLUSTER=1,//NOLINT +}; + +structIVFPQBuildCagraConfig{ +/// +///Thenumberofinvertedlists(clusters) +/// +///Hint:thenumberofvectorspercluster(`n_rows/n_lists`)shouldbe +///approximately1,000to10,000. + +uint32_tn_lists=1024; +///Thenumberofiterationssearchingforkmeanscenters(indexbuilding). +uint32_tkmeans_n_iters=20; +///Thefractionofdatatouseduringiterativekmeansbuilding. +doublekmeans_trainset_fraction=0.5; +/// +///ThebitlengthofthevectorelementaftercompressionbyPQ. +/// +///Possiblevalues:[4,5,6,7,8]. +/// +///Hint:thesmallerthe'pq_bits',thesmallertheindexsizeandthe +///betterthesearchperformance,butthelowertherecall. + +uint32_tpq_bits=8; +/// +///ThedimensionalityofthevectoraftercompressionbyPQ.Whenzero,an +///optimalvalueisselectedusingaheuristic. +/// +///NB:`pq_dim///pq_bits`mustbeamultipleof8. +/// +///Hint:asmaller'pq_dim'resultsinasmallerindexsizeandbetter +///searchperformance,butlowerrecall.If'pq_bits'is8,'pq_dim'canbe +///settoanynumber,butmultipleof8aredesirableforgoodperformance. +///If'pq_bits'isnot8,'pq_dim'shouldbeamultipleof8.Forgood +///performance,itisdesirablethat'pq_dim'isamultipleof32.Ideally, +///'pq_dim'shouldbealsoadivisorofthedatasetdim. + +uint32_tpq_dim=0; +///HowPQcodebooksarecreated. +codebook_gencodebook_kind=codebook_gen::PER_SUBSPACE; +/// +///Applyarandomrotationmatrixontheinputdataandqueriesevenif +///`dim%pq_dim==0`. +/// +///Note:if`dim`isnotmultipleof`pq_dim`,arandomrotationisalways +///appliedtotheinputdataandqueriestotransformtheworkingspace +///from`dim`to`rot_dim`,whichmaybeslightlylargerthantheoriginal +///spaceandandisamultipleof`pq_dim`(`rot_dim%pq_dim==0`). +///However,thistransformisnotnecessarywhen`dim`ismultipleof +///`pq_dim` +///(`dim==rot_dim`,hencenoneedinadding"extra"datacolumns/ +///features). +/// +///Bydefault,if`dim==rot_dim`,therotationtransformisinitialized +///withtheidentitymatrix.When`force_random_rotation==true`,arandom +///orthogonaltransformmatrixisgeneratedregardlessofthevaluesof +///`dim`and`pq_dim`. + +boolforce_random_rotation=false; +/// +///Bydefault,thealgorithmallocatesmorespacethannecessaryfor +///individualclusters +///(`list_data`).Thisallowstoamortizethecostofmemoryallocationand +///reducethenumberofdatacopiesduringrepeatedcallsto`extend` +///(extendingthedatabase). +/// +///Thealternativeistheconservativeallocationbehavior;whenenabled, +///thealgorithmalwaysallocatestheminimumamountofmemoryrequiredto +///storethegivennumberofrecords.Setthisflagto`true`ifyouprefer +///touseaslittleGPUmemoryforthedatabaseaspossible. + +boolconservative_memory_allocation=false; +}; + +structIVFPQSearchCagraConfig{ +///Thenumberofclusterstosearch. +uint32_tn_probes=20; +/// +///Datatypeoflookuptabletobecreateddynamicallyatsearchtime. +/// +///Possiblevalues:[CUDA_R_32F,CUDA_R_16F,CUDA_R_8U] +/// +///Theuseoflow-precisiontypesreducestheamountofsharedmemory +///requiredatsearchtime,sofastsharedmemorykernelscanbeusedeven +///fordatasetswithlargedimansionality.Notethattherecallisslightly +///degradedwhenlow-precisiontypeisselected. + +cudaDataType_tlut_dtype=CUDA_R_32F; +/// +///Storagedatatypefordistance/similaritycomputedatsearchtime. +/// +///Possiblevalues:[CUDA_R_16F,CUDA_R_32F] +/// +///Iftheperformancelimiteratsearchtimeisdevicememoryaccess, +///selectingFP16willimproveperformanceslightly. + +cudaDataType_tinternal_distance_dtype=CUDA_R_32F; +/// +///PreferredfractionofSM'sunifiedmemory/L1cachetobeusedas +///sharedmemory. +/// +///Possiblevalues:[0.0-1.0]asafractionofthe +///`sharedMemPerMultiprocessor`. +/// +///OnewantstoincreasethecarveouttomakesureagoodGPUoccupancyfor +///themainsearchkernel,butnottokeepittoohightoleavesomememory +///tobeusedasL1cache.Note,thisvalueisinterpretedonlyasahint. +///Moreover,aGPUusuallyallowsonlyafixedsetofcacheconfigurations, +///sotheprovidedvalueisroundeduptothenearestconfiguration.Refer +///totheNVIDIAtuningguideforthetargetGPUarchitecture. +/// +///Note,thisisalow-leveltuningparameterthatcanhavedrastic +///negativeeffectsonthesearchperformanceiftweakedincorrectly. + +doublepreferred_shmem_carveout=1.0; +}; + +structGpuIndexCagraConfig:publicGpuIndexConfig{ +///Degreeofinputgraphforpruning. +size_tintermediate_graph_degree=128; +///Degreeofoutputgraph. +size_tgraph_degree=64; +///ANNalgorithmtobuildknngraph. +graph_build_algobuild_algo=graph_build_algo::IVF_PQ; +///NumberofIterationstorunifbuildingwithNN_DESCENT +size_tnn_descent_niter=20; + +IVFPQBuildCagraConfig*ivf_pq_params=nullptr; +IVFPQSearchCagraConfig*ivf_pq_search_params=nullptr; +}; + +enumclasssearch_algo{ +///Forlargebatchsizes. +SINGLE_CTA, +///Forsmallbatchsizes. +MULTI_CTA, +MULTI_KERNEL, +AUTO +}; + +enumclasshash_mode{HASH,SMALL,AUTO}; + +structSearchParametersCagra:SearchParameters{ +///Maximumnumberofqueriestosearchatthesametime(batchsize).Auto +///selectwhen0. +size_tmax_queries=0; + +///Numberofintermediatesearchresultsretainedduringthesearch. +/// +///Thisisthemainknobtoadjusttradeoffbetweenaccuracyandsearch +///speed.Highervaluesimprovethesearchaccuracy. + +size_titopk_size=64; + +///Upperlimitofsearchiterations.Autoselectwhen0. +size_tmax_iterations=0; + +//Inthefollowingwelistadditionalsearchparametersforfinetuning. +//Reasonabledefaultvaluesareautomaticallychosen. + +///Whichsearchimplementationtouse. +search_algoalgo=search_algo::AUTO; + +///Numberofthreadsusedtocalculateasingledistance.4,8,16,or32. + +size_tteam_size=0; + +///Numberofgraphnodestoselectasthestartingpointforthesearchin +///eachiteration.akasearchwidth? +size_tsearch_width=1; +///Lowerlimitofsearchiterations. +size_tmin_iterations=0; + +///Threadblocksize.0,64,128,256,512,1024.Autoselectionwhen0. +size_tthread_block_size=0; +///Hashmaptype.AutoselectionwhenAUTO. +hash_modehashmap_mode=hash_mode::AUTO; +///Lowerlimitofhashmapbitlength.Morethan8. +size_thashmap_min_bitlen=0; +///Upperlimitofhashmapfillrate.Morethan0.1,lessthan0.9. +floathashmap_max_fill_rate=0.5; + +///Numberofiterationsofinitialrandomseednodeselection.1ormore. + +uint32_tnum_random_samplings=1; +///Bitmaskusedforinitialrandomseednodeselection. +uint64_tseed=0x128394; +}; + +structGpuIndexCagra:publicGpuIndex{ +public: +GpuIndexCagra( +GpuResourcesProvider*provider, +intdims, +faiss::MetricTypemetric=faiss::METRIC_L2, +GpuIndexCagraConfigconfig=GpuIndexCagraConfig()); + +///TrainsCAGRAbasedonthegivenvectordata +voidtrain(idx_tn,constfloat*x)override; + +///InitializeourselvesfromthegivenCPUindex;willoverwrite +///alldatainourselves +voidcopyFrom(constfaiss::IndexHNSWCagra*index); + +///CopyourselvestothegivenCPUindex;willoverwritealldata +///intheindexinstance +voidcopyTo(faiss::IndexHNSWCagra*index)const; + +voidreset()override; + +std::vector<idx_t>get_knngraph()const; + +protected: +booladdImplRequiresIDs_()constoverride; + +voidaddImpl_(idx_tn,constfloat*x,constidx_t*ids)override; + +///CalledfromGpuIndexforsearch +voidsearchImpl_( +idx_tn, +constfloat*x, +intk, +float*distances, +idx_t*labels, +constSearchParameters*search_params)constoverride; + +///Ourconfigurationoptions +constGpuIndexCagraConfigcagraConfig_; + +///Instancethatweown;containstheinvertedlists +std::shared_ptr<RaftCagra>index_; +}; + +}//namespacegpu +}//namespacefaiss diff --git a/xml/GpuIndex_8h.xml b/xml/GpuIndex_8h.xml index 097c96c9ea..0a1e68cd76 100644 --- a/xml/GpuIndex_8h.xml +++ b/xml/GpuIndex_8h.xml @@ -192,203 +192,204 @@ -/** -*Copyright(c)Facebook,Inc.anditsaffiliates. -* -*ThissourcecodeislicensedundertheMITlicensefoundinthe -*LICENSEfileintherootdirectoryofthissourcetree. -*/ -/* -*Copyright(c)2023,NVIDIACORPORATION. -* -*LicensedundertheApacheLicense,Version2.0(the"License"); -*youmaynotusethisfileexceptincompliancewiththeLicense. -*YoumayobtainacopyoftheLicenseat -* -*http://www.apache.org/licenses/LICENSE-2.0 -* -*Unlessrequiredbyapplicablelaworagreedtoinwriting,software -*distributedundertheLicenseisdistributedonan"ASIS"BASIS, -*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. -*SeetheLicenseforthespecificlanguagegoverningpermissionsand -*limitationsundertheLicense. -*/ - -#pragmaonce - -#include<faiss/Index.h> -#include<faiss/gpu/GpuResources.h> - -namespacefaiss{ -namespacegpu{ - -structGpuIndexConfig{ -///GPUdeviceonwhichtheindexisresident -intdevice=0; - -///Whatmemoryspacetouseforprimarystorage. -///OnPascalandabove(CC6+)architectures,allowsGPUstouse -///morememorythanisavailableontheGPU. -MemorySpacememorySpace=MemorySpace::Device; - -///ShouldtheindexdispatchdowntoRAFT? -#ifdefinedUSE_NVIDIA_RAFT -booluse_raft=true; -#else -booluse_raft=false; -#endif -}; - -///AcentralizedfunctionthatdetermineswhetherRAFTshould -///beusedbasedonvariousconditions(suchasunsupportedarchitecture) -boolshould_use_raft(GpuIndexConfigconfig_); - -classGpuIndex:publicfaiss::Index{ -public: -GpuIndex( -std::shared_ptr<GpuResources>resources, -intdims, -faiss::MetricTypemetric, -floatmetricArg, -GpuIndexConfigconfig); - -///Returnsthedevicethatthisindexisresidenton -intgetDevice()const; - -///ReturnsareferencetoourGpuResourcesobjectthatmanagesmemory, -///streamandhandleresourcesontheGPU -std::shared_ptr<GpuResources>getResources(); - -///Settheminimumdatasizeforsearches(inMiB)forwhichweuse -///CPU->GPUpaging -voidsetMinPagingSize(size_tsize); - -///Returnsthecurrentminimumdatasizeforpagedsearches -size_tgetMinPagingSize()const; - -///`x`canberesidentontheCPUoranyGPU;copiesareperformed -///asneeded -///Handlespagedaddsiftheaddsetistoolarge;callsaddInternal_ -voidadd(idx_t,constfloat*x)override; - -///`x`and`ids`canberesidentontheCPUoranyGPU;copiesare -///performedasneeded -///Handlespagedaddsiftheaddsetistoolarge;callsaddInternal_ -voidadd_with_ids(idx_tn,constfloat*x,constidx_t*ids)override; - -///`x`and`labels`canberesidentontheCPUoranyGPU;copiesare -///performedasneeded -voidassign(idx_tn,constfloat*x,idx_t*labels,idx_tk=1) -constoverride; - -///`x`,`distances`and`labels`canberesidentontheCPUorany -///GPU;copiesareperformedasneeded -voidsearch( -idx_tn, -constfloat*x, -idx_tk, -float*distances, -idx_t*labels, -constSearchParameters*params=nullptr)constoverride; - -///`x`,`distances`and`labels`and`recons`canberesidentontheCPUor -///anyGPU;copiesareperformedasneeded -voidsearch_and_reconstruct( -idx_tn, -constfloat*x, -idx_tk, -float*distances, -idx_t*labels, -float*recons, -constSearchParameters*params=nullptr)constoverride; - -///OverriddentoforceGPUindicestoprovidetheirownGPU-friendly -///implementation -voidcompute_residual(constfloat*x,float*residual,idx_tkey) -constoverride; - -///OverriddentoforceGPUindicestoprovidetheirownGPU-friendly -///implementation -voidcompute_residual_n( -idx_tn, -constfloat*xs, -float*residuals, -constidx_t*keys)constoverride; - -protected: -///CopywhatweneedfromtheCPUequivalent -voidcopyFrom(constfaiss::Index*index); - -///CopywhatwehavetotheCPUequivalent -voidcopyTo(faiss::Index*index)const; - -///DoesaddImpl_requireIDs?Ifso,andnoIDsareprovided,wewill -///generatethemsequentiallybasedontheorderinwhichtheIDsareadded -virtualbooladdImplRequiresIDs_()const=0; - -///Overriddentoactuallyperformtheadd -///Alldataisguaranteedtoberesidentonourdevice -virtualvoidaddImpl_(idx_tn,constfloat*x,constidx_t*ids)=0; - -///Overriddentoactuallyperformthesearch -///Alldataisguaranteedtoberesidentonourdevice -virtualvoidsearchImpl_( -idx_tn, -constfloat*x, -intk, -float*distances, -idx_t*labels, -constSearchParameters*params)const=0; - -private: -///Handlespagedaddsiftheaddsetistoolarge,passesto -///addImpl_toactuallyperformtheaddforthecurrentpage -voidaddPaged_(idx_tn,constfloat*x,constidx_t*ids); - -///CallsaddImpl_forasinglepageofGPU-residentdata -voidaddPage_(idx_tn,constfloat*x,constidx_t*ids); - -///CallssearchImpl_forasinglepageofGPU-residentdata -voidsearchNonPaged_( -idx_tn, -constfloat*x, -intk, -float*outDistancesData, -idx_t*outIndicesData, -constSearchParameters*params)const; - -///CallssearchImpl_forasinglepageofGPU-residentdata, -///handlingpagingofthedataandcopiesfromtheCPU -voidsearchFromCpuPaged_( -idx_tn, -constfloat*x, -intk, -float*outDistancesData, -idx_t*outIndicesData, -constSearchParameters*params)const; - -protected: -///Managesstreams,cuBLAShandlesandscratchmemoryfordevices -std::shared_ptr<GpuResources>resources_; - -///Ourconfigurationoptions -constGpuIndexConfigconfig_; - -///SizeabovewhichwepagecopiesfromtheCPUtoGPU -size_tminPagedSize_; -}; - -///IfthegivenindexisaGPUindex,thisreturnstheindexinstance -GpuIndex*tryCastGpuIndex(faiss::Index*index); - -///IsthegivenindexinstanceaGPUindex? -boolisGpuIndex(faiss::Index*index); - -///DoesthegivenCPUindexinstancehaveacorrespondingGPUimplementation? -boolisGpuIndexImplemented(faiss::Index*index); - -}//namespacegpu -}//namespacefaiss +//@lint-ignore-everyLICENSELINT +/** +*Copyright(c)Facebook,Inc.anditsaffiliates. +* +*ThissourcecodeislicensedundertheMITlicensefoundinthe +*LICENSEfileintherootdirectoryofthissourcetree. +*/ +/* +*Copyright(c)2023,NVIDIACORPORATION. +* +*LicensedundertheApacheLicense,Version2.0(the"License"); +*youmaynotusethisfileexceptincompliancewiththeLicense. +*YoumayobtainacopyoftheLicenseat +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +*Unlessrequiredbyapplicablelaworagreedtoinwriting,software +*distributedundertheLicenseisdistributedonan"ASIS"BASIS, +*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. +*SeetheLicenseforthespecificlanguagegoverningpermissionsand +*limitationsundertheLicense. +*/ + +#pragmaonce + +#include<faiss/Index.h> +#include<faiss/gpu/GpuResources.h> + +namespacefaiss{ +namespacegpu{ + +structGpuIndexConfig{ +///GPUdeviceonwhichtheindexisresident +intdevice=0; + +///Whatmemoryspacetouseforprimarystorage. +///OnPascalandabove(CC6+)architectures,allowsGPUstouse +///morememorythanisavailableontheGPU. +MemorySpacememorySpace=MemorySpace::Device; + +///ShouldtheindexdispatchdowntoRAFT? +#ifdefinedUSE_NVIDIA_RAFT +booluse_raft=true; +#else +booluse_raft=false; +#endif +}; + +///AcentralizedfunctionthatdetermineswhetherRAFTshould +///beusedbasedonvariousconditions(suchasunsupportedarchitecture) +boolshould_use_raft(GpuIndexConfigconfig_); + +classGpuIndex:publicfaiss::Index{ +public: +GpuIndex( +std::shared_ptr<GpuResources>resources, +intdims, +faiss::MetricTypemetric, +floatmetricArg, +GpuIndexConfigconfig); + +///Returnsthedevicethatthisindexisresidenton +intgetDevice()const; + +///ReturnsareferencetoourGpuResourcesobjectthatmanagesmemory, +///streamandhandleresourcesontheGPU +std::shared_ptr<GpuResources>getResources(); + +///Settheminimumdatasizeforsearches(inMiB)forwhichweuse +///CPU->GPUpaging +voidsetMinPagingSize(size_tsize); + +///Returnsthecurrentminimumdatasizeforpagedsearches +size_tgetMinPagingSize()const; + +///`x`canberesidentontheCPUoranyGPU;copiesareperformed +///asneeded +///Handlespagedaddsiftheaddsetistoolarge;callsaddInternal_ +voidadd(idx_t,constfloat*x)override; + +///`x`and`ids`canberesidentontheCPUoranyGPU;copiesare +///performedasneeded +///Handlespagedaddsiftheaddsetistoolarge;callsaddInternal_ +voidadd_with_ids(idx_tn,constfloat*x,constidx_t*ids)override; + +///`x`and`labels`canberesidentontheCPUoranyGPU;copiesare +///performedasneeded +voidassign(idx_tn,constfloat*x,idx_t*labels,idx_tk=1) +constoverride; + +///`x`,`distances`and`labels`canberesidentontheCPUorany +///GPU;copiesareperformedasneeded +voidsearch( +idx_tn, +constfloat*x, +idx_tk, +float*distances, +idx_t*labels, +constSearchParameters*params=nullptr)constoverride; + +///`x`,`distances`and`labels`and`recons`canberesidentontheCPUor +///anyGPU;copiesareperformedasneeded +voidsearch_and_reconstruct( +idx_tn, +constfloat*x, +idx_tk, +float*distances, +idx_t*labels, +float*recons, +constSearchParameters*params=nullptr)constoverride; + +///OverriddentoforceGPUindicestoprovidetheirownGPU-friendly +///implementation +voidcompute_residual(constfloat*x,float*residual,idx_tkey) +constoverride; + +///OverriddentoforceGPUindicestoprovidetheirownGPU-friendly +///implementation +voidcompute_residual_n( +idx_tn, +constfloat*xs, +float*residuals, +constidx_t*keys)constoverride; + +protected: +///CopywhatweneedfromtheCPUequivalent +voidcopyFrom(constfaiss::Index*index); + +///CopywhatwehavetotheCPUequivalent +voidcopyTo(faiss::Index*index)const; + +///DoesaddImpl_requireIDs?Ifso,andnoIDsareprovided,wewill +///generatethemsequentiallybasedontheorderinwhichtheIDsareadded +virtualbooladdImplRequiresIDs_()const=0; + +///Overriddentoactuallyperformtheadd +///Alldataisguaranteedtoberesidentonourdevice +virtualvoidaddImpl_(idx_tn,constfloat*x,constidx_t*ids)=0; + +///Overriddentoactuallyperformthesearch +///Alldataisguaranteedtoberesidentonourdevice +virtualvoidsearchImpl_( +idx_tn, +constfloat*x, +intk, +float*distances, +idx_t*labels, +constSearchParameters*params)const=0; + +private: +///Handlespagedaddsiftheaddsetistoolarge,passesto +///addImpl_toactuallyperformtheaddforthecurrentpage +voidaddPaged_(idx_tn,constfloat*x,constidx_t*ids); + +///CallsaddImpl_forasinglepageofGPU-residentdata +voidaddPage_(idx_tn,constfloat*x,constidx_t*ids); + +///CallssearchImpl_forasinglepageofGPU-residentdata +voidsearchNonPaged_( +idx_tn, +constfloat*x, +intk, +float*outDistancesData, +idx_t*outIndicesData, +constSearchParameters*params)const; + +///CallssearchImpl_forasinglepageofGPU-residentdata, +///handlingpagingofthedataandcopiesfromtheCPU +voidsearchFromCpuPaged_( +idx_tn, +constfloat*x, +intk, +float*outDistancesData, +idx_t*outIndicesData, +constSearchParameters*params)const; + +protected: +///Managesstreams,cuBLAShandlesandscratchmemoryfordevices +std::shared_ptr<GpuResources>resources_; + +///Ourconfigurationoptions +constGpuIndexConfigconfig_; + +///SizeabovewhichwepagecopiesfromtheCPUtoGPU +size_tminPagedSize_; +}; + +///IfthegivenindexisaGPUindex,thisreturnstheindexinstance +GpuIndex*tryCastGpuIndex(faiss::Index*index); + +///IsthegivenindexinstanceaGPUindex? +boolisGpuIndex(faiss::Index*index); + +///DoesthegivenCPUindexinstancehaveacorrespondingGPUimplementation? +boolisGpuIndexImplemented(faiss::Index*index); + +}//namespacegpu +}//namespacefaiss diff --git a/xml/GpuResources_8h.xml b/xml/GpuResources_8h.xml index 38375c6099..25ea22cc39 100644 --- a/xml/GpuResources_8h.xml +++ b/xml/GpuResources_8h.xml @@ -200,311 +200,312 @@ -/** -*Copyright(c)Facebook,Inc.anditsaffiliates. -* -*ThissourcecodeislicensedundertheMITlicensefoundinthe -*LICENSEfileintherootdirectoryofthissourcetree. -*/ -/* -*Copyright(c)2023,NVIDIACORPORATION. -* -*LicensedundertheApacheLicense,Version2.0(the"License"); -*youmaynotusethisfileexceptincompliancewiththeLicense. -*YoumayobtainacopyoftheLicenseat -* -*http://www.apache.org/licenses/LICENSE-2.0 -* -*Unlessrequiredbyapplicablelaworagreedtoinwriting,software -*distributedundertheLicenseisdistributedonan"ASIS"BASIS, -*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. -*SeetheLicenseforthespecificlanguagegoverningpermissionsand -*limitationsundertheLicense. -*/ - -#pragmaonce - -#include<cublas_v2.h> -#include<cuda_runtime.h> -#include<faiss/impl/FaissAssert.h> - -#include<memory> -#include<utility> -#include<vector> - -#ifdefinedUSE_NVIDIA_RAFT -#include<raft/core/device_resources.hpp> -#include<rmm/mr/device/device_memory_resource.hpp> -#endif - -namespacefaiss{ -namespacegpu{ - -classGpuResources; - -enumAllocType{ -///Unknownallocationtypeormiscellaneous(notcurrentlycategorized) -Other=0, - -///PrimarydatastorageforGpuIndexFlat(therawmatrixofvectorsand -///vectornormsifneeded) -FlatData=1, - -///PrimarydatastorageforGpuIndexIVF*(thestorageforeachindividual -///IVFlist) -IVFLists=2, - -///Quantizer(PQ,SQ)dictionaryinformation -Quantizer=3, - -///ForGpuIndexIVFPQ,"precomputedcodes"formoreefficientPQlookup -///requiretheuseofpossiblylargetables.Thesearemarkedseparately -///from -///Quantizerasthesecanfrequentlybe100s-1000sofMiBinsize -QuantizerPrecomputedCodes=4, - -/// -///StandardGpuResourcesimplementationspecifictypes -/// - -///WhenusingStandardGpuResources,temporarymemoryallocations -///(MemorySpace::Temporary)comeoutofastackregionofmemorythatis -///allocatedupfrontforeachgpu(e.g.,1.5GiBuponinitialization). -///This -///allocationbyStandardGpuResourcesismarkedwiththisAllocType. -TemporaryMemoryBuffer=10, - -///WhenusingStandardGpuResources,anyMemorySpace::Temporaryallocations -///thatcannotbesatisfiedwithintheTemporaryMemoryBufferregionfall -///back -///tocallingcudaMallocwhicharesizedtojusttherequestathand.These -///"overflow"temporaryallocationsaremarkedwiththisAllocType. -TemporaryMemoryOverflow=11, -}; - -///ConvertanAllocTypetostring -std::stringallocTypeToString(AllocTypet); - -///MemoryregionsaccessibletotheGPU -enumMemorySpace{ -///Temporarydevicememory(guaranteedtonolongerbeuseduponexitofa -///top-levelindexcall,andwherethestreamsusingithavecompletedGPU -///work).TypicallybackedbyDevicememory(cudaMalloc/cudaFree). -Temporary=0, - -///ManagedusingcudaMalloc/cudaFree(typicalGPUdevicememory) -Device=1, - -///ManagedusingcudaMallocManaged/cudaFree(typicalUnifiedCPU/GPU -///memory) -Unified=2, -}; - -///ConvertaMemorySpacetostring -std::stringmemorySpaceToString(MemorySpaces); - -///Informationonwhat/whereanallocationis -structAllocInfo{ -inlineAllocInfo(){} - -inlineAllocInfo(AllocTypeat,intdev,MemorySpacesp,cudaStream_tst) -:type(at),device(dev),space(sp),stream(st){} - -///Returnsastringrepresentationofthisinfo -std::stringtoString()const; - -///Theinternalcategoryoftheallocation -AllocTypetype=AllocType::Other; - -///Thedeviceonwhichtheallocationishappening -intdevice=0; - -///Thememoryspaceoftheallocation -MemorySpacespace=MemorySpace::Device; - -///Thestreamonwhichnewworkonthememorywillbeordered(e.g.,ifa -///pieceofmemorycachedandtobereturnedforthiscallwaslastusedon -///stream3andanewmemoryrequestisforstream4,thememorymanager -///willsynchronizestream4towaitforthecompletionofstream3via -///eventsorotherstreamsynchronization. -/// -///Thememorymanagerguaranteesthatthereturnedmemoryisfreetouse -///withoutdataracesonthisstreamspecified. -cudaStream_tstream=nullptr; -}; - -///CreateanAllocInfoforthecurrentdevicewithMemorySpace::Device -AllocInfomakeDevAlloc(AllocTypeat,cudaStream_tst); - -///CreateanAllocInfoforthecurrentdevicewithMemorySpace::Temporary -AllocInfomakeTempAlloc(AllocTypeat,cudaStream_tst); - -///CreateanAllocInfoforthecurrentdevice -AllocInfomakeSpaceAlloc(AllocTypeat,MemorySpacesp,cudaStream_tst); - -///Informationonwhat/whereanallocationis,alongwithhowbigitshouldbe -structAllocRequest:publicAllocInfo{ -inlineAllocRequest(){} - -inlineAllocRequest(constAllocInfo&info,size_tsz) -:AllocInfo(info),size(sz){} - -inlineAllocRequest( -AllocTypeat, -intdev, -MemorySpacesp, -cudaStream_tst, -size_tsz) -:AllocInfo(at,dev,sp,st),size(sz){} - -///Returnsastringrepresentationofthisrequest -std::stringtoString()const; - -///Thesizeinbytesoftheallocation -size_tsize=0; - -#ifdefinedUSE_NVIDIA_RAFT -rmm::mr::device_memory_resource*mr=nullptr; -#endif -}; - -///ARAIIobjectthatmanagesatemporarymemoryrequest -structGpuMemoryReservation{ -GpuMemoryReservation(); -GpuMemoryReservation( -GpuResources*r, -intdev, -cudaStream_tstr, -void*p, -size_tsz); -GpuMemoryReservation(GpuMemoryReservation&&m)noexcept; -~GpuMemoryReservation(); - -GpuMemoryReservation&operator=(GpuMemoryReservation&&m); - -inlinevoid*get(){ -returndata; -} - -voidrelease(); - -GpuResources*res; -intdevice; -cudaStream_tstream; -void*data; -size_tsize; -}; - -///BaseclassofGPU-sideresourceprovider;hidesprovisionof -///cuBLAShandles,CUDAstreamsandalldevicememoryallocationperformed -classGpuResources{ -public: -virtual~GpuResources(); - -///Calltopre-allocateresourcesforaparticulardevice.Ifthisis -///notcalled,thenresourceswillbeallocatedatthefirsttime -///ofdemand -virtualvoidinitializeForDevice(intdevice)=0; - -///ReturnsthecuBLAShandlethatweuseforthegivendevice -virtualcublasHandle_tgetBlasHandle(intdevice)=0; - -///Returnsthestreamthatweorderallcomputationonforthe -///givendevice -virtualcudaStream_tgetDefaultStream(intdevice)=0; - -#ifdefinedUSE_NVIDIA_RAFT -///Returnstherafthandleforthegivendevicewhichcanbeusedto -///makecallstootherraftprimitives. -virtualraft::device_resources&getRaftHandle(intdevice)=0; -raft::device_resources&getRaftHandleCurrentDevice(); -#endif - -///Overridesthedefaultstreamforadevicetotheuser-suppliedstream. -///Theresourcesobjectdoesnotownthisstream(i.e.,itwillnotdestroy -///it). -virtualvoidsetDefaultStream(intdevice,cudaStream_tstream)=0; - -///Returnsthesetofalternativestreamsthatweuseforthegivendevice -virtualstd::vector<cudaStream_t>getAlternateStreams(intdevice)=0; - -///Memorymanagement -///Returnsanallocationfromthegivenmemoryspace,orderedwithrespect -///tothegivenstream(i.e.,thefirstuserwillbeakernelinthis -///stream).Allallocationsaresizedinternallytobethenexthighest -///multipleof16bytes,andallallocationsreturnedareguaranteedtobe -///16bytealigned. -virtualvoid*allocMemory(constAllocRequest&req)=0; - -///Returnsapreviousallocation -virtualvoiddeallocMemory(intdevice,void*in)=0; - -///ForMemorySpace::Temporary,howmuchspaceisimmediatelyavailable -///withoutcudaMallocallocation? -virtualsize_tgetTempMemoryAvailable(intdevice)const=0; - -///ReturnstheavailableCPUpinnedmemorybuffer -virtualstd::pair<void*,size_t>getPinnedMemory()=0; - -///ReturnsthestreamonwhichweperformasyncCPU<->GPUcopies -virtualcudaStream_tgetAsyncCopyStream(intdevice)=0; - -/// -///Functionsprovidedbydefault -/// - -///CallsgetBlasHandlewiththecurrentdevice -cublasHandle_tgetBlasHandleCurrentDevice(); - -///CallsgetDefaultStreamwiththecurrentdevice -cudaStream_tgetDefaultStreamCurrentDevice(); - -///CallsgetTempMemoryAvailablewiththecurrentdevice -size_tgetTempMemoryAvailableCurrentDevice()const; - -///ReturnsatemporarymemoryallocationviaaRAIIobject -GpuMemoryReservationallocMemoryHandle(constAllocRequest&req); - -///SynchronizestheCPUwithrespecttothedefaultstreamforthe -///givendevice -//equivalenttocudaDeviceSynchronize(getDefaultStream(device)) -voidsyncDefaultStream(intdevice); - -///CallssyncDefaultStreamforthecurrentdevice -voidsyncDefaultStreamCurrentDevice(); - -///CallsgetAlternateStreamsforthecurrentdevice -std::vector<cudaStream_t>getAlternateStreamsCurrentDevice(); - -///CallsgetAsyncCopyStreamforthecurrentdevice -cudaStream_tgetAsyncCopyStreamCurrentDevice(); -}; - -///Interfaceforaproviderofasharedresourcesobject.Thisistoavoid -///interfacingstd::shared_ptrtoPython -classGpuResourcesProvider{ -public: -virtual~GpuResourcesProvider(); - -///Returnsthesharedresourcesobject -virtualstd::shared_ptr<GpuResources>getResources()=0; -}; - -///AsimplewrapperforaGpuResourcesobjecttomakeaGpuResourcesProvider -///outofitagain -classGpuResourcesProviderFromInstance:publicGpuResourcesProvider{ -public: -explicitGpuResourcesProviderFromInstance(std::shared_ptr<GpuResources>p); -~GpuResourcesProviderFromInstance()override; - -std::shared_ptr<GpuResources>getResources()override; - -private: -std::shared_ptr<GpuResources>res_; -}; - -}//namespacegpu -}//namespacefaiss +//@lint-ignore-everyLICENSELINT +/** +*Copyright(c)Facebook,Inc.anditsaffiliates. +* +*ThissourcecodeislicensedundertheMITlicensefoundinthe +*LICENSEfileintherootdirectoryofthissourcetree. +*/ +/* +*Copyright(c)2023,NVIDIACORPORATION. +* +*LicensedundertheApacheLicense,Version2.0(the"License"); +*youmaynotusethisfileexceptincompliancewiththeLicense. +*YoumayobtainacopyoftheLicenseat +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +*Unlessrequiredbyapplicablelaworagreedtoinwriting,software +*distributedundertheLicenseisdistributedonan"ASIS"BASIS, +*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. +*SeetheLicenseforthespecificlanguagegoverningpermissionsand +*limitationsundertheLicense. +*/ + +#pragmaonce + +#include<cublas_v2.h> +#include<cuda_runtime.h> +#include<faiss/impl/FaissAssert.h> + +#include<memory> +#include<utility> +#include<vector> + +#ifdefinedUSE_NVIDIA_RAFT +#include<raft/core/device_resources.hpp> +#include<rmm/mr/device/device_memory_resource.hpp> +#endif + +namespacefaiss{ +namespacegpu{ + +classGpuResources; + +enumAllocType{ +///Unknownallocationtypeormiscellaneous(notcurrentlycategorized) +Other=0, + +///PrimarydatastorageforGpuIndexFlat(therawmatrixofvectorsand +///vectornormsifneeded) +FlatData=1, + +///PrimarydatastorageforGpuIndexIVF*(thestorageforeachindividual +///IVFlist) +IVFLists=2, + +///Quantizer(PQ,SQ)dictionaryinformation +Quantizer=3, + +///ForGpuIndexIVFPQ,"precomputedcodes"formoreefficientPQlookup +///requiretheuseofpossiblylargetables.Thesearemarkedseparately +///from +///Quantizerasthesecanfrequentlybe100s-1000sofMiBinsize +QuantizerPrecomputedCodes=4, + +/// +///StandardGpuResourcesimplementationspecifictypes +/// + +///WhenusingStandardGpuResources,temporarymemoryallocations +///(MemorySpace::Temporary)comeoutofastackregionofmemorythatis +///allocatedupfrontforeachgpu(e.g.,1.5GiBuponinitialization). +///This +///allocationbyStandardGpuResourcesismarkedwiththisAllocType. +TemporaryMemoryBuffer=10, + +///WhenusingStandardGpuResources,anyMemorySpace::Temporaryallocations +///thatcannotbesatisfiedwithintheTemporaryMemoryBufferregionfall +///back +///tocallingcudaMallocwhicharesizedtojusttherequestathand.These +///"overflow"temporaryallocationsaremarkedwiththisAllocType. +TemporaryMemoryOverflow=11, +}; + +///ConvertanAllocTypetostring +std::stringallocTypeToString(AllocTypet); + +///MemoryregionsaccessibletotheGPU +enumMemorySpace{ +///Temporarydevicememory(guaranteedtonolongerbeuseduponexitofa +///top-levelindexcall,andwherethestreamsusingithavecompletedGPU +///work).TypicallybackedbyDevicememory(cudaMalloc/cudaFree). +Temporary=0, + +///ManagedusingcudaMalloc/cudaFree(typicalGPUdevicememory) +Device=1, + +///ManagedusingcudaMallocManaged/cudaFree(typicalUnifiedCPU/GPU +///memory) +Unified=2, +}; + +///ConvertaMemorySpacetostring +std::stringmemorySpaceToString(MemorySpaces); + +///Informationonwhat/whereanallocationis +structAllocInfo{ +inlineAllocInfo(){} + +inlineAllocInfo(AllocTypeat,intdev,MemorySpacesp,cudaStream_tst) +:type(at),device(dev),space(sp),stream(st){} + +///Returnsastringrepresentationofthisinfo +std::stringtoString()const; + +///Theinternalcategoryoftheallocation +AllocTypetype=AllocType::Other; + +///Thedeviceonwhichtheallocationishappening +intdevice=0; + +///Thememoryspaceoftheallocation +MemorySpacespace=MemorySpace::Device; + +///Thestreamonwhichnewworkonthememorywillbeordered(e.g.,ifa +///pieceofmemorycachedandtobereturnedforthiscallwaslastusedon +///stream3andanewmemoryrequestisforstream4,thememorymanager +///willsynchronizestream4towaitforthecompletionofstream3via +///eventsorotherstreamsynchronization. +/// +///Thememorymanagerguaranteesthatthereturnedmemoryisfreetouse +///withoutdataracesonthisstreamspecified. +cudaStream_tstream=nullptr; +}; + +///CreateanAllocInfoforthecurrentdevicewithMemorySpace::Device +AllocInfomakeDevAlloc(AllocTypeat,cudaStream_tst); + +///CreateanAllocInfoforthecurrentdevicewithMemorySpace::Temporary +AllocInfomakeTempAlloc(AllocTypeat,cudaStream_tst); + +///CreateanAllocInfoforthecurrentdevice +AllocInfomakeSpaceAlloc(AllocTypeat,MemorySpacesp,cudaStream_tst); + +///Informationonwhat/whereanallocationis,alongwithhowbigitshouldbe +structAllocRequest:publicAllocInfo{ +inlineAllocRequest(){} + +inlineAllocRequest(constAllocInfo&info,size_tsz) +:AllocInfo(info),size(sz){} + +inlineAllocRequest( +AllocTypeat, +intdev, +MemorySpacesp, +cudaStream_tst, +size_tsz) +:AllocInfo(at,dev,sp,st),size(sz){} + +///Returnsastringrepresentationofthisrequest +std::stringtoString()const; + +///Thesizeinbytesoftheallocation +size_tsize=0; + +#ifdefinedUSE_NVIDIA_RAFT +rmm::mr::device_memory_resource*mr=nullptr; +#endif +}; + +///ARAIIobjectthatmanagesatemporarymemoryrequest +structGpuMemoryReservation{ +GpuMemoryReservation(); +GpuMemoryReservation( +GpuResources*r, +intdev, +cudaStream_tstr, +void*p, +size_tsz); +GpuMemoryReservation(GpuMemoryReservation&&m)noexcept; +~GpuMemoryReservation(); + +GpuMemoryReservation&operator=(GpuMemoryReservation&&m); + +inlinevoid*get(){ +returndata; +} + +voidrelease(); + +GpuResources*res; +intdevice; +cudaStream_tstream; +void*data; +size_tsize; +}; + +///BaseclassofGPU-sideresourceprovider;hidesprovisionof +///cuBLAShandles,CUDAstreamsandalldevicememoryallocationperformed +classGpuResources{ +public: +virtual~GpuResources(); + +///Calltopre-allocateresourcesforaparticulardevice.Ifthisis +///notcalled,thenresourceswillbeallocatedatthefirsttime +///ofdemand +virtualvoidinitializeForDevice(intdevice)=0; + +///ReturnsthecuBLAShandlethatweuseforthegivendevice +virtualcublasHandle_tgetBlasHandle(intdevice)=0; + +///Returnsthestreamthatweorderallcomputationonforthe +///givendevice +virtualcudaStream_tgetDefaultStream(intdevice)=0; + +#ifdefinedUSE_NVIDIA_RAFT +///Returnstherafthandleforthegivendevicewhichcanbeusedto +///makecallstootherraftprimitives. +virtualraft::device_resources&getRaftHandle(intdevice)=0; +raft::device_resources&getRaftHandleCurrentDevice(); +#endif + +///Overridesthedefaultstreamforadevicetotheuser-suppliedstream. +///Theresourcesobjectdoesnotownthisstream(i.e.,itwillnotdestroy +///it). +virtualvoidsetDefaultStream(intdevice,cudaStream_tstream)=0; + +///Returnsthesetofalternativestreamsthatweuseforthegivendevice +virtualstd::vector<cudaStream_t>getAlternateStreams(intdevice)=0; + +///Memorymanagement +///Returnsanallocationfromthegivenmemoryspace,orderedwithrespect +///tothegivenstream(i.e.,thefirstuserwillbeakernelinthis +///stream).Allallocationsaresizedinternallytobethenexthighest +///multipleof16bytes,andallallocationsreturnedareguaranteedtobe +///16bytealigned. +virtualvoid*allocMemory(constAllocRequest&req)=0; + +///Returnsapreviousallocation +virtualvoiddeallocMemory(intdevice,void*in)=0; + +///ForMemorySpace::Temporary,howmuchspaceisimmediatelyavailable +///withoutcudaMallocallocation? +virtualsize_tgetTempMemoryAvailable(intdevice)const=0; + +///ReturnstheavailableCPUpinnedmemorybuffer +virtualstd::pair<void*,size_t>getPinnedMemory()=0; + +///ReturnsthestreamonwhichweperformasyncCPU<->GPUcopies +virtualcudaStream_tgetAsyncCopyStream(intdevice)=0; + +/// +///Functionsprovidedbydefault +/// + +///CallsgetBlasHandlewiththecurrentdevice +cublasHandle_tgetBlasHandleCurrentDevice(); + +///CallsgetDefaultStreamwiththecurrentdevice +cudaStream_tgetDefaultStreamCurrentDevice(); + +///CallsgetTempMemoryAvailablewiththecurrentdevice +size_tgetTempMemoryAvailableCurrentDevice()const; + +///ReturnsatemporarymemoryallocationviaaRAIIobject +GpuMemoryReservationallocMemoryHandle(constAllocRequest&req); + +///SynchronizestheCPUwithrespecttothedefaultstreamforthe +///givendevice +//equivalenttocudaDeviceSynchronize(getDefaultStream(device)) +voidsyncDefaultStream(intdevice); + +///CallssyncDefaultStreamforthecurrentdevice +voidsyncDefaultStreamCurrentDevice(); + +///CallsgetAlternateStreamsforthecurrentdevice +std::vector<cudaStream_t>getAlternateStreamsCurrentDevice(); + +///CallsgetAsyncCopyStreamforthecurrentdevice +cudaStream_tgetAsyncCopyStreamCurrentDevice(); +}; + +///Interfaceforaproviderofasharedresourcesobject.Thisistoavoid +///interfacingstd::shared_ptrtoPython +classGpuResourcesProvider{ +public: +virtual~GpuResourcesProvider(); + +///Returnsthesharedresourcesobject +virtualstd::shared_ptr<GpuResources>getResources()=0; +}; + +///AsimplewrapperforaGpuResourcesobjecttomakeaGpuResourcesProvider +///outofitagain +classGpuResourcesProviderFromInstance:publicGpuResourcesProvider{ +public: +explicitGpuResourcesProviderFromInstance(std::shared_ptr<GpuResources>p); +~GpuResourcesProviderFromInstance()override; + +std::shared_ptr<GpuResources>getResources()override; + +private: +std::shared_ptr<GpuResources>res_; +}; + +}//namespacegpu +}//namespacefaiss diff --git a/xml/RaftCagra_8cuh.xml b/xml/RaftCagra_8cuh.xml index 41cfd8833a..c6ad3cefaf 100644 --- a/xml/RaftCagra_8cuh.xml +++ b/xml/RaftCagra_8cuh.xml @@ -7,6 +7,7 @@ +//@lint-ignore-everyLICENSELINT /** *Copyright(c)Facebook,Inc.anditsaffiliates. * diff --git a/xml/RaftFlatIndex_8cuh.xml b/xml/RaftFlatIndex_8cuh.xml index 2588abf339..d81dea82ec 100644 --- a/xml/RaftFlatIndex_8cuh.xml +++ b/xml/RaftFlatIndex_8cuh.xml @@ -7,6 +7,7 @@ +//@lint-ignore-everyLICENSELINT /** *Copyright(c)Facebook,Inc.anditsaffiliates. * diff --git a/xml/RaftIVFFlat_8cuh.xml b/xml/RaftIVFFlat_8cuh.xml index 7bb2e73456..66cc469991 100644 --- a/xml/RaftIVFFlat_8cuh.xml +++ b/xml/RaftIVFFlat_8cuh.xml @@ -7,6 +7,7 @@ +//@lint-ignore-everyLICENSELINT /** *Copyright(c)Facebook,Inc.anditsaffiliates. * diff --git a/xml/RaftIVFPQ_8cuh.xml b/xml/RaftIVFPQ_8cuh.xml index f2b0f94cab..63dbccffe4 100644 --- a/xml/RaftIVFPQ_8cuh.xml +++ b/xml/RaftIVFPQ_8cuh.xml @@ -7,6 +7,7 @@ +//@lint-ignore-everyLICENSELINT /** *Copyright(c)Facebook,Inc.anditsaffiliates. * diff --git a/xml/RaftUtils_8h.xml b/xml/RaftUtils_8h.xml index d875fff574..1f34b8f734 100644 --- a/xml/RaftUtils_8h.xml +++ b/xml/RaftUtils_8h.xml @@ -120,81 +120,82 @@ -/** -*Copyright(c)Facebook,Inc.anditsaffiliates. -* -*ThissourcecodeislicensedundertheMITlicensefoundinthe -*LICENSEfileintherootdirectoryofthissourcetree. -*/ -/* -*Copyright(c)2023,NVIDIACORPORATION. -* -*LicensedundertheApacheLicense,Version2.0(the"License"); -*youmaynotusethisfileexceptincompliancewiththeLicense. -*YoumayobtainacopyoftheLicenseat -* -*http://www.apache.org/licenses/LICENSE-2.0 -* -*Unlessrequiredbyapplicablelaworagreedtoinwriting,software -*distributedundertheLicenseisdistributedonan"ASIS"BASIS, -*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. -*SeetheLicenseforthespecificlanguagegoverningpermissionsand -*limitationsundertheLicense. -*/ - -#pragmaonce - -#include<faiss/MetricType.h> -#include<faiss/gpu/GpuResources.h> -#include<faiss/gpu/utils/Tensor.cuh> - -#include<raft/distance/distance_types.hpp> - -#pragmaGCCvisibilitypush(default) -namespacefaiss{ -namespacegpu{ - -inlineraft::distance::DistanceTypemetricFaissToRaft( -MetricTypemetric, -boolexactDistance){ -switch(metric){ -caseMetricType::METRIC_INNER_PRODUCT: -returnraft::distance::DistanceType::InnerProduct; -caseMetricType::METRIC_L2: -returnraft::distance::DistanceType::L2Expanded; -caseMetricType::METRIC_L1: -returnraft::distance::DistanceType::L1; -caseMetricType::METRIC_Linf: -returnraft::distance::DistanceType::Linf; -caseMetricType::METRIC_Lp: -returnraft::distance::DistanceType::LpUnexpanded; -caseMetricType::METRIC_Canberra: -returnraft::distance::DistanceType::Canberra; -caseMetricType::METRIC_BrayCurtis: -returnraft::distance::DistanceType::BrayCurtis; -caseMetricType::METRIC_JensenShannon: -returnraft::distance::DistanceType::JensenShannon; -default: -RAFT_FAIL("Distancetypenotsupported"); -} -} - -///IdentifymatrixrowscontainingnonNaNvalues.validRows[i]isfalseifrow -///icontainsaNaNvalueandtrueotherwise. -voidvalidRowIndices( -GpuResources*res, -Tensor<float,2,true>&vecs, -bool*validRows); - -///FilteroutmatrixrowscontainingNaNvalues.Thevectorsandindicesare -///updatedin-place. -idx_tinplaceGatherFilteredRows( -GpuResources*res, -Tensor<float,2,true>&vecs, -Tensor<idx_t,1,true>&indices); -}//namespacegpu -}//namespacefaiss -#pragmaGCCvisibilitypop +//@lint-ignore-everyLICENSELINT +/** +*Copyright(c)Facebook,Inc.anditsaffiliates. +* +*ThissourcecodeislicensedundertheMITlicensefoundinthe +*LICENSEfileintherootdirectoryofthissourcetree. +*/ +/* +*Copyright(c)2023,NVIDIACORPORATION. +* +*LicensedundertheApacheLicense,Version2.0(the"License"); +*youmaynotusethisfileexceptincompliancewiththeLicense. +*YoumayobtainacopyoftheLicenseat +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +*Unlessrequiredbyapplicablelaworagreedtoinwriting,software +*distributedundertheLicenseisdistributedonan"ASIS"BASIS, +*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. +*SeetheLicenseforthespecificlanguagegoverningpermissionsand +*limitationsundertheLicense. +*/ + +#pragmaonce + +#include<faiss/MetricType.h> +#include<faiss/gpu/GpuResources.h> +#include<faiss/gpu/utils/Tensor.cuh> + +#include<raft/distance/distance_types.hpp> + +#pragmaGCCvisibilitypush(default) +namespacefaiss{ +namespacegpu{ + +inlineraft::distance::DistanceTypemetricFaissToRaft( +MetricTypemetric, +boolexactDistance){ +switch(metric){ +caseMetricType::METRIC_INNER_PRODUCT: +returnraft::distance::DistanceType::InnerProduct; +caseMetricType::METRIC_L2: +returnraft::distance::DistanceType::L2Expanded; +caseMetricType::METRIC_L1: +returnraft::distance::DistanceType::L1; +caseMetricType::METRIC_Linf: +returnraft::distance::DistanceType::Linf; +caseMetricType::METRIC_Lp: +returnraft::distance::DistanceType::LpUnexpanded; +caseMetricType::METRIC_Canberra: +returnraft::distance::DistanceType::Canberra; +caseMetricType::METRIC_BrayCurtis: +returnraft::distance::DistanceType::BrayCurtis; +caseMetricType::METRIC_JensenShannon: +returnraft::distance::DistanceType::JensenShannon; +default: +RAFT_FAIL("Distancetypenotsupported"); +} +} + +///IdentifymatrixrowscontainingnonNaNvalues.validRows[i]isfalseifrow +///icontainsaNaNvalueandtrueotherwise. +voidvalidRowIndices( +GpuResources*res, +Tensor<float,2,true>&vecs, +bool*validRows); + +///FilteroutmatrixrowscontainingNaNvalues.Thevectorsandindicesare +///updatedin-place. +idx_tinplaceGatherFilteredRows( +GpuResources*res, +Tensor<float,2,true>&vecs, +Tensor<idx_t,1,true>&indices); +}//namespacegpu +}//namespacefaiss +#pragmaGCCvisibilitypop diff --git a/xml/StandardGpuResources_8h.xml b/xml/StandardGpuResources_8h.xml index 1983af405e..389a2faa8c 100644 --- a/xml/StandardGpuResources_8h.xml +++ b/xml/StandardGpuResources_8h.xml @@ -174,265 +174,266 @@ -/** -*Copyright(c)Facebook,Inc.anditsaffiliates. -* -*ThissourcecodeislicensedundertheMITlicensefoundinthe -*LICENSEfileintherootdirectoryofthissourcetree. -*/ -/* -*Copyright(c)2023,NVIDIACORPORATION. -* -*LicensedundertheApacheLicense,Version2.0(the"License"); -*youmaynotusethisfileexceptincompliancewiththeLicense. -*YoumayobtainacopyoftheLicenseat -* -*http://www.apache.org/licenses/LICENSE-2.0 -* -*Unlessrequiredbyapplicablelaworagreedtoinwriting,software -*distributedundertheLicenseisdistributedonan"ASIS"BASIS, -*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. -*SeetheLicenseforthespecificlanguagegoverningpermissionsand -*limitationsundertheLicense. -*/ - -#pragmaonce - -#ifdefinedUSE_NVIDIA_RAFT -#include<raft/core/device_resources.hpp> -#include<rmm/mr/host/pinned_memory_resource.hpp> -#endif - -#include<faiss/gpu/GpuResources.h> -#include<faiss/gpu/utils/DeviceUtils.h> -#include<faiss/gpu/utils/StackDeviceMemory.h> -#include<functional> -#include<map> -#include<unordered_map> -#include<vector> - -#pragmaGCCvisibilitypush(default) -namespacefaiss{ -namespacegpu{ - -///StandardimplementationoftheGpuResourcesobjectthatprovidesfora -///temporarymemorymanager -classStandardGpuResourcesImpl:publicGpuResources{ -public: -StandardGpuResourcesImpl(); - -~StandardGpuResourcesImpl()override; - -///Disableallocationoftemporarymemory;alltemporarymemory -///requestswillcallcudaMalloc/cudaFreeatthepointofuse -voidnoTempMemory(); - -///Specifythatwewishtouseacertainfixedsizeofmemoryon -///alldevicesastemporarymemory.ThisistheupperboundfortheGPU -///memorythatwewillreserve.Wewillnevergoabove1.5GiBonanyGPU; -///smallerGPUs(with<=4GiBor<=8GiB)willuselessmemorythanthat. -///Toavoidanytemporarymemoryallocation,pass0. -voidsetTempMemory(size_tsize); - -///Setamountofpinnedmemorytoallocate,forasyncGPU<->CPU -///transfers -voidsetPinnedMemory(size_tsize); - -///Calledtochangethestreamforworkordering.Wedonotown`stream`; -///i.e.,itwillnotbedestroyedwhentheGpuResourcesobjectgetscleaned -///up. -///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto -///thisstreamuponexitfromanindexorotherFaissGPUcall. -voidsetDefaultStream(intdevice,cudaStream_tstream)override; - -///Revertthedefaultstreamtotheoriginalstreammanagedbythis -///resourcesobject,incasesomeonecalled`setDefaultStream`. -voidrevertDefaultStream(intdevice); - -///ReturnsthestreamforthegivendeviceonwhichallFaissGPUworkis -///ordered. -///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto -///thisstreamuponexitfromanindexorotherFaissGPUcall. -cudaStream_tgetDefaultStream(intdevice)override; - -#ifdefinedUSE_NVIDIA_RAFT -///Returnstherafthandleforthegivendevicewhichcanbeusedto -///makecallstootherraftprimitives. -raft::device_resources&getRaftHandle(intdevice)override; -#endif - -///Calledtochangetheworkorderingstreamstothenullstream -///foralldevices -voidsetDefaultNullStreamAllDevices(); - -///Ifenabled,willprinteveryGPUmemoryallocationanddeallocationto -///standardoutput -voidsetLogMemoryAllocations(boolenable); - -public: -///Internalsystemcalls - -///Initializeresourcesforthisdevice -voidinitializeForDevice(intdevice)override; - -cublasHandle_tgetBlasHandle(intdevice)override; - -std::vector<cudaStream_t>getAlternateStreams(intdevice)override; - -///Allocatenon-temporaryGPUmemory -void*allocMemory(constAllocRequest&req)override; - -///Returnsapreviousallocation -voiddeallocMemory(intdevice,void*in)override; - -size_tgetTempMemoryAvailable(intdevice)constoverride; - -///ExportadescriptionofmemoryusedforPython -std::map<int,std::map<std::string,std::pair<int,size_t>>>getMemoryInfo() -const; - -std::pair<void*,size_t>getPinnedMemory()override; - -cudaStream_tgetAsyncCopyStream(intdevice)override; - -protected: -///HaveGPUresourcesbeeninitializedforthisdeviceyet? -boolisInitialized(intdevice)const; - -///AdjustthedefaulttemporarymemoryallocationbasedonthetotalGPU -///memorysize -staticsize_tgetDefaultTempMemForGPU(intdevice,size_trequested); - -protected: -///Setofcurrentlyoutstandingmemoryallocationsperdevice -///device->(allocrequest,allocatedptr) -std::unordered_map<int,std::unordered_map<void*,AllocRequest>>allocs_; - -///Temporarymemoryprovider,pereachdevice -std::unordered_map<int,std::unique_ptr<StackDeviceMemory>>tempMemory_; - -///Ourdefaultstreamthatworkisorderedon,onepereachdevice -std::unordered_map<int,cudaStream_t>defaultStreams_; - -///Thiscontainsparticularstreamsassetbytheuserfor -///ordering,ifany -std::unordered_map<int,cudaStream_t>userDefaultStreams_; - -///Otherstreamswecanuse,pereachdevice -std::unordered_map<int,std::vector<cudaStream_t>>alternateStreams_; - -///AsynccopystreamtouseforGPU<->CPUpinnedmemorycopies -std::unordered_map<int,cudaStream_t>asyncCopyStreams_; - -///cuBLAShandleforeachdevice -std::unordered_map<int,cublasHandle_t>blasHandles_; - -#ifdefinedUSE_NVIDIA_RAFT -///rafthandleforeachdevice -std::unordered_map<int,raft::device_resources>raftHandles_; - -/** -*FIXME:Integratingtheseinaseparatecodepathfornow.Ultimately, -*itwouldbeniceifweuseasimplememoryresourceabstraction -*inFAISSsowecouldpluginwhethertouseRMM'smemoryresources -*orthedefault. -* -*There'senoughduplicatedlogicthatitdoesn't*seem*tomakesense -*tocreateasubclassonlyfortheRMMmemoryresources. -*/ - -//managed_memory_resource -std::unique_ptr<rmm::mr::device_memory_resource>mmr_; - -//pinned_memory_resource -std::unique_ptr<rmm::mr::host_memory_resource>pmr_; -#endif - -///PinnedmemoryallocationforusewiththisGPU -void*pinnedMemAlloc_; -size_tpinnedMemAllocSize_; - -///Anotheroptionistouseaspecifiedamountofmemoryonall -///devices -size_ttempMemSize_; - -///Amountofpinnedmemoryweshouldallocate -size_tpinnedMemSize_; - -///WhetherornotwelogeveryGPUmemoryallocationanddeallocation -boolallocLogging_; -}; - -///DefaultimplementationofGpuResourcesthatallocatesacuBLAS -///streamand2streamsforuse,aswellastemporarymemory. -///Internally,theFaissGPUcodeusestheinstancemanagedbygetResources, -///butthisistheuser-facingobjectthatisinternallyreferencecounted. -classStandardGpuResources:publicGpuResourcesProvider{ -public: -StandardGpuResources(); -~StandardGpuResources()override; - -std::shared_ptr<GpuResources>getResources()override; - -///Disableallocationoftemporarymemory;alltemporarymemory -///requestswillcallcudaMalloc/cudaFreeatthepointofuse -voidnoTempMemory(); - -///Specifythatwewishtouseacertainfixedsizeofmemoryon -///alldevicesastemporarymemory.ThisistheupperboundfortheGPU -///memorythatwewillreserve.Wewillnevergoabove1.5GiBonanyGPU; -///smallerGPUs(with<=4GiBor<=8GiB)willuselessmemorythanthat. -///Toavoidanytemporarymemoryallocation,pass0. -voidsetTempMemory(size_tsize); - -///Setamountofpinnedmemorytoallocate,forasyncGPU<->CPU -///transfers -voidsetPinnedMemory(size_tsize); - -///Calledtochangethestreamforworkordering.Wedonotown`stream`; -///i.e.,itwillnotbedestroyedwhentheGpuResourcesobjectgetscleaned -///up. -///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto -///thisstreamuponexitfromanindexorotherFaissGPUcall. -voidsetDefaultStream(intdevice,cudaStream_tstream); - -///Revertthedefaultstreamtotheoriginalstreammanagedbythis -///resourcesobject,incasesomeonecalled`setDefaultStream`. -voidrevertDefaultStream(intdevice); - -///Calledtochangetheworkorderingstreamstothenullstream -///foralldevices -voidsetDefaultNullStreamAllDevices(); - -///ExportadescriptionofmemoryusedforPython -std::map<int,std::map<std::string,std::pair<int,size_t>>>getMemoryInfo() -const; -///Returnsthecurrentdefaultstream -cudaStream_tgetDefaultStream(intdevice); - -#ifdefinedUSE_NVIDIA_RAFT -///Returnstherafthandleforthegivendevicewhichcanbeusedto -///makecallstootherraftprimitives. -raft::device_resources&getRaftHandle(intdevice); -#endif - -///Returnsthecurrentamountoftempmemoryavailable -size_tgetTempMemoryAvailable(intdevice)const; - -///SynchronizeourdefaultstreamwiththeCPU -voidsyncDefaultStreamCurrentDevice(); - -///Ifenabled,willprinteveryGPUmemoryallocationanddeallocationto -///standardoutput -voidsetLogMemoryAllocations(boolenable); - -private: -std::shared_ptr<StandardGpuResourcesImpl>res_; -}; - -}//namespacegpu -}//namespacefaiss -#pragmaGCCvisibilitypop +//@lint-ignore-everyLICENSELINT +/** +*Copyright(c)Facebook,Inc.anditsaffiliates. +* +*ThissourcecodeislicensedundertheMITlicensefoundinthe +*LICENSEfileintherootdirectoryofthissourcetree. +*/ +/* +*Copyright(c)2023,NVIDIACORPORATION. +* +*LicensedundertheApacheLicense,Version2.0(the"License"); +*youmaynotusethisfileexceptincompliancewiththeLicense. +*YoumayobtainacopyoftheLicenseat +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +*Unlessrequiredbyapplicablelaworagreedtoinwriting,software +*distributedundertheLicenseisdistributedonan"ASIS"BASIS, +*WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied. +*SeetheLicenseforthespecificlanguagegoverningpermissionsand +*limitationsundertheLicense. +*/ + +#pragmaonce + +#ifdefinedUSE_NVIDIA_RAFT +#include<raft/core/device_resources.hpp> +#include<rmm/mr/host/pinned_memory_resource.hpp> +#endif + +#include<faiss/gpu/GpuResources.h> +#include<faiss/gpu/utils/DeviceUtils.h> +#include<faiss/gpu/utils/StackDeviceMemory.h> +#include<functional> +#include<map> +#include<unordered_map> +#include<vector> + +#pragmaGCCvisibilitypush(default) +namespacefaiss{ +namespacegpu{ + +///StandardimplementationoftheGpuResourcesobjectthatprovidesfora +///temporarymemorymanager +classStandardGpuResourcesImpl:publicGpuResources{ +public: +StandardGpuResourcesImpl(); + +~StandardGpuResourcesImpl()override; + +///Disableallocationoftemporarymemory;alltemporarymemory +///requestswillcallcudaMalloc/cudaFreeatthepointofuse +voidnoTempMemory(); + +///Specifythatwewishtouseacertainfixedsizeofmemoryon +///alldevicesastemporarymemory.ThisistheupperboundfortheGPU +///memorythatwewillreserve.Wewillnevergoabove1.5GiBonanyGPU; +///smallerGPUs(with<=4GiBor<=8GiB)willuselessmemorythanthat. +///Toavoidanytemporarymemoryallocation,pass0. +voidsetTempMemory(size_tsize); + +///Setamountofpinnedmemorytoallocate,forasyncGPU<->CPU +///transfers +voidsetPinnedMemory(size_tsize); + +///Calledtochangethestreamforworkordering.Wedonotown`stream`; +///i.e.,itwillnotbedestroyedwhentheGpuResourcesobjectgetscleaned +///up. +///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto +///thisstreamuponexitfromanindexorotherFaissGPUcall. +voidsetDefaultStream(intdevice,cudaStream_tstream)override; + +///Revertthedefaultstreamtotheoriginalstreammanagedbythis +///resourcesobject,incasesomeonecalled`setDefaultStream`. +voidrevertDefaultStream(intdevice); + +///ReturnsthestreamforthegivendeviceonwhichallFaissGPUworkis +///ordered. +///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto +///thisstreamuponexitfromanindexorotherFaissGPUcall. +cudaStream_tgetDefaultStream(intdevice)override; + +#ifdefinedUSE_NVIDIA_RAFT +///Returnstherafthandleforthegivendevicewhichcanbeusedto +///makecallstootherraftprimitives. +raft::device_resources&getRaftHandle(intdevice)override; +#endif + +///Calledtochangetheworkorderingstreamstothenullstream +///foralldevices +voidsetDefaultNullStreamAllDevices(); + +///Ifenabled,willprinteveryGPUmemoryallocationanddeallocationto +///standardoutput +voidsetLogMemoryAllocations(boolenable); + +public: +///Internalsystemcalls + +///Initializeresourcesforthisdevice +voidinitializeForDevice(intdevice)override; + +cublasHandle_tgetBlasHandle(intdevice)override; + +std::vector<cudaStream_t>getAlternateStreams(intdevice)override; + +///Allocatenon-temporaryGPUmemory +void*allocMemory(constAllocRequest&req)override; + +///Returnsapreviousallocation +voiddeallocMemory(intdevice,void*in)override; + +size_tgetTempMemoryAvailable(intdevice)constoverride; + +///ExportadescriptionofmemoryusedforPython +std::map<int,std::map<std::string,std::pair<int,size_t>>>getMemoryInfo() +const; + +std::pair<void*,size_t>getPinnedMemory()override; + +cudaStream_tgetAsyncCopyStream(intdevice)override; + +protected: +///HaveGPUresourcesbeeninitializedforthisdeviceyet? +boolisInitialized(intdevice)const; + +///AdjustthedefaulttemporarymemoryallocationbasedonthetotalGPU +///memorysize +staticsize_tgetDefaultTempMemForGPU(intdevice,size_trequested); + +protected: +///Setofcurrentlyoutstandingmemoryallocationsperdevice +///device->(allocrequest,allocatedptr) +std::unordered_map<int,std::unordered_map<void*,AllocRequest>>allocs_; + +///Temporarymemoryprovider,pereachdevice +std::unordered_map<int,std::unique_ptr<StackDeviceMemory>>tempMemory_; + +///Ourdefaultstreamthatworkisorderedon,onepereachdevice +std::unordered_map<int,cudaStream_t>defaultStreams_; + +///Thiscontainsparticularstreamsassetbytheuserfor +///ordering,ifany +std::unordered_map<int,cudaStream_t>userDefaultStreams_; + +///Otherstreamswecanuse,pereachdevice +std::unordered_map<int,std::vector<cudaStream_t>>alternateStreams_; + +///AsynccopystreamtouseforGPU<->CPUpinnedmemorycopies +std::unordered_map<int,cudaStream_t>asyncCopyStreams_; + +///cuBLAShandleforeachdevice +std::unordered_map<int,cublasHandle_t>blasHandles_; + +#ifdefinedUSE_NVIDIA_RAFT +///rafthandleforeachdevice +std::unordered_map<int,raft::device_resources>raftHandles_; + +/** +*FIXME:Integratingtheseinaseparatecodepathfornow.Ultimately, +*itwouldbeniceifweuseasimplememoryresourceabstraction +*inFAISSsowecouldpluginwhethertouseRMM'smemoryresources +*orthedefault. +* +*There'senoughduplicatedlogicthatitdoesn't*seem*tomakesense +*tocreateasubclassonlyfortheRMMmemoryresources. +*/ + +//managed_memory_resource +std::unique_ptr<rmm::mr::device_memory_resource>mmr_; + +//pinned_memory_resource +std::unique_ptr<rmm::mr::host_memory_resource>pmr_; +#endif + +///PinnedmemoryallocationforusewiththisGPU +void*pinnedMemAlloc_; +size_tpinnedMemAllocSize_; + +///Anotheroptionistouseaspecifiedamountofmemoryonall +///devices +size_ttempMemSize_; + +///Amountofpinnedmemoryweshouldallocate +size_tpinnedMemSize_; + +///WhetherornotwelogeveryGPUmemoryallocationanddeallocation +boolallocLogging_; +}; + +///DefaultimplementationofGpuResourcesthatallocatesacuBLAS +///streamand2streamsforuse,aswellastemporarymemory. +///Internally,theFaissGPUcodeusestheinstancemanagedbygetResources, +///butthisistheuser-facingobjectthatisinternallyreferencecounted. +classStandardGpuResources:publicGpuResourcesProvider{ +public: +StandardGpuResources(); +~StandardGpuResources()override; + +std::shared_ptr<GpuResources>getResources()override; + +///Disableallocationoftemporarymemory;alltemporarymemory +///requestswillcallcudaMalloc/cudaFreeatthepointofuse +voidnoTempMemory(); + +///Specifythatwewishtouseacertainfixedsizeofmemoryon +///alldevicesastemporarymemory.ThisistheupperboundfortheGPU +///memorythatwewillreserve.Wewillnevergoabove1.5GiBonanyGPU; +///smallerGPUs(with<=4GiBor<=8GiB)willuselessmemorythanthat. +///Toavoidanytemporarymemoryallocation,pass0. +voidsetTempMemory(size_tsize); + +///Setamountofpinnedmemorytoallocate,forasyncGPU<->CPU +///transfers +voidsetPinnedMemory(size_tsize); + +///Calledtochangethestreamforworkordering.Wedonotown`stream`; +///i.e.,itwillnotbedestroyedwhentheGpuResourcesobjectgetscleaned +///up. +///WeareguaranteedthatallFaissGPUworkisorderedwithrespectto +///thisstreamuponexitfromanindexorotherFaissGPUcall. +voidsetDefaultStream(intdevice,cudaStream_tstream); + +///Revertthedefaultstreamtotheoriginalstreammanagedbythis +///resourcesobject,incasesomeonecalled`setDefaultStream`. +voidrevertDefaultStream(intdevice); + +///Calledtochangetheworkorderingstreamstothenullstream +///foralldevices +voidsetDefaultNullStreamAllDevices(); + +///ExportadescriptionofmemoryusedforPython +std::map<int,std::map<std::string,std::pair<int,size_t>>>getMemoryInfo() +const; +///Returnsthecurrentdefaultstream +cudaStream_tgetDefaultStream(intdevice); + +#ifdefinedUSE_NVIDIA_RAFT +///Returnstherafthandleforthegivendevicewhichcanbeusedto +///makecallstootherraftprimitives. +raft::device_resources&getRaftHandle(intdevice); +#endif + +///Returnsthecurrentamountoftempmemoryavailable +size_tgetTempMemoryAvailable(intdevice)const; + +///SynchronizeourdefaultstreamwiththeCPU +voidsyncDefaultStreamCurrentDevice(); + +///Ifenabled,willprinteveryGPUmemoryallocationanddeallocationto +///standardoutput +voidsetLogMemoryAllocations(boolenable); + +private: +std::shared_ptr<StandardGpuResourcesImpl>res_; +}; + +}//namespacegpu +}//namespacefaiss +#pragmaGCCvisibilitypop diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndex.xml b/xml/classfaiss_1_1gpu_1_1GpuIndex.xml index ec703a2b65..8c1c6f5c6d 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndex.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndex.xml @@ -19,7 +19,7 @@ - + const GpuIndexConfig @@ -33,7 +33,7 @@ - + size_t @@ -47,7 +47,7 @@ - + @@ -82,7 +82,7 @@ - + int @@ -96,7 +96,7 @@ - + std::shared_ptr< GpuResources > @@ -110,7 +110,7 @@ - + void @@ -128,7 +128,7 @@ - + size_t @@ -142,7 +142,7 @@ - + void @@ -165,7 +165,7 @@ - + void @@ -192,7 +192,7 @@ - + void @@ -224,7 +224,7 @@ - + void @@ -264,7 +264,7 @@ - + void @@ -308,7 +308,7 @@ - + void @@ -336,7 +336,7 @@ - + void @@ -368,7 +368,7 @@ - + void @@ -987,7 +987,7 @@ - + void @@ -1005,7 +1005,7 @@ - + bool @@ -1022,7 +1022,7 @@ - + void @@ -1051,7 +1051,7 @@ - + void @@ -1092,7 +1092,7 @@ - + @@ -1120,7 +1120,7 @@ - + void @@ -1146,7 +1146,7 @@ - + void @@ -1184,7 +1184,7 @@ - + void @@ -1222,7 +1222,7 @@ - + @@ -1422,7 +1422,7 @@ - + faiss::gpu::GpuIndexadd faiss::gpu::GpuIndexadd_with_ids diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexFlat.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexFlat.xml index 7c5b8290e2..d86b4f35a6 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexFlat.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexFlat.xml @@ -47,7 +47,7 @@ - + const GpuIndexConfig @@ -61,7 +61,7 @@ - + size_t @@ -75,7 +75,7 @@ - + @@ -477,7 +477,7 @@ - + std::shared_ptr< GpuResources > @@ -491,7 +491,7 @@ - + void @@ -509,7 +509,7 @@ - + size_t @@ -523,7 +523,7 @@ - + void @@ -550,7 +550,7 @@ - + void @@ -582,7 +582,7 @@ - + void @@ -622,7 +622,7 @@ - + void @@ -666,7 +666,7 @@ - + void @@ -1112,7 +1112,7 @@ - + void @@ -1130,7 +1130,7 @@ - + diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexFlatIP.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexFlatIP.xml index 174c3543db..38ba28b7b1 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexFlatIP.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexFlatIP.xml @@ -418,7 +418,7 @@ - + std::shared_ptr< GpuResources > @@ -432,7 +432,7 @@ - + void @@ -450,7 +450,7 @@ - + size_t @@ -464,7 +464,7 @@ - + void @@ -491,7 +491,7 @@ - + void @@ -523,7 +523,7 @@ - + void @@ -563,7 +563,7 @@ - + void @@ -607,7 +607,7 @@ - + void @@ -979,7 +979,7 @@ - + const GpuIndexConfig @@ -993,7 +993,7 @@ - + size_t @@ -1007,7 +1007,7 @@ - + @@ -1027,7 +1027,7 @@ - + void @@ -1045,7 +1045,7 @@ - + void diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexFlatL2.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexFlatL2.xml index 58df4fd9b0..e097748e7d 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexFlatL2.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexFlatL2.xml @@ -418,7 +418,7 @@ - + std::shared_ptr< GpuResources > @@ -432,7 +432,7 @@ - + void @@ -450,7 +450,7 @@ - + size_t @@ -464,7 +464,7 @@ - + void @@ -491,7 +491,7 @@ - + void @@ -523,7 +523,7 @@ - + void @@ -563,7 +563,7 @@ - + void @@ -607,7 +607,7 @@ - + void @@ -979,7 +979,7 @@ - + const GpuIndexConfig @@ -993,7 +993,7 @@ - + size_t @@ -1007,7 +1007,7 @@ - + @@ -1027,7 +1027,7 @@ - + void @@ -1045,7 +1045,7 @@ - + void diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexIVF.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexIVF.xml index 7d239213af..0e7771cf64 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexIVF.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexIVF.xml @@ -49,7 +49,7 @@ - + const GpuIndexConfig @@ -63,7 +63,7 @@ - + size_t @@ -77,7 +77,7 @@ - + @@ -565,7 +565,7 @@ - + std::shared_ptr< GpuResources > @@ -579,7 +579,7 @@ - + void @@ -597,7 +597,7 @@ - + size_t @@ -611,7 +611,7 @@ - + void @@ -634,7 +634,7 @@ - + void @@ -661,7 +661,7 @@ - + void @@ -693,7 +693,7 @@ - + void @@ -733,7 +733,7 @@ - + void @@ -777,7 +777,7 @@ - + void @@ -805,7 +805,7 @@ - + void @@ -837,7 +837,7 @@ - + void @@ -1687,7 +1687,7 @@ - + void @@ -1705,7 +1705,7 @@ - + diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFFlat.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFFlat.xml index 6181fb90f8..b07aa8bf90 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFFlat.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFFlat.xml @@ -87,7 +87,7 @@ - + const GpuIndexConfig @@ -101,7 +101,7 @@ - + size_t @@ -115,7 +115,7 @@ - + @@ -781,7 +781,7 @@ - + std::shared_ptr< GpuResources > @@ -795,7 +795,7 @@ - + void @@ -813,7 +813,7 @@ - + size_t @@ -827,7 +827,7 @@ - + void @@ -850,7 +850,7 @@ - + void @@ -877,7 +877,7 @@ - + void @@ -909,7 +909,7 @@ - + void @@ -949,7 +949,7 @@ - + void @@ -993,7 +993,7 @@ - + void @@ -1021,7 +1021,7 @@ - + void @@ -1053,7 +1053,7 @@ - + void @@ -1645,7 +1645,7 @@ - + void @@ -1663,7 +1663,7 @@ - + int diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFPQ.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFPQ.xml index c2a3324339..d5d3f64cb0 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFPQ.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFPQ.xml @@ -348,7 +348,7 @@ - + const GpuIndexConfig @@ -362,7 +362,7 @@ - + size_t @@ -376,7 +376,7 @@ - + @@ -1079,7 +1079,7 @@ - + std::shared_ptr< GpuResources > @@ -1093,7 +1093,7 @@ - + void @@ -1111,7 +1111,7 @@ - + size_t @@ -1125,7 +1125,7 @@ - + void @@ -1148,7 +1148,7 @@ - + void @@ -1175,7 +1175,7 @@ - + void @@ -1207,7 +1207,7 @@ - + void @@ -1247,7 +1247,7 @@ - + void @@ -1291,7 +1291,7 @@ - + void @@ -1319,7 +1319,7 @@ - + void @@ -1351,7 +1351,7 @@ - + void @@ -2046,7 +2046,7 @@ - + void @@ -2064,7 +2064,7 @@ - + int diff --git a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizer.xml b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizer.xml index 505c55b29d..293dc90d53 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizer.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizer.xml @@ -320,7 +320,7 @@ - + const GpuIndexConfig @@ -334,7 +334,7 @@ - + size_t @@ -348,7 +348,7 @@ - + @@ -979,7 +979,7 @@ - + std::shared_ptr< GpuResources > @@ -993,7 +993,7 @@ - + void @@ -1011,7 +1011,7 @@ - + size_t @@ -1025,7 +1025,7 @@ - + void @@ -1048,7 +1048,7 @@ - + void @@ -1075,7 +1075,7 @@ - + void @@ -1107,7 +1107,7 @@ - + void @@ -1147,7 +1147,7 @@ - + void @@ -1191,7 +1191,7 @@ - + void @@ -1219,7 +1219,7 @@ - + void @@ -1251,7 +1251,7 @@ - + void @@ -1880,7 +1880,7 @@ - + void @@ -1898,7 +1898,7 @@ - + int diff --git a/xml/classfaiss_1_1gpu_1_1GpuResources.xml b/xml/classfaiss_1_1gpu_1_1GpuResources.xml index efa8b6d470..e89727a5e1 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuResources.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuResources.xml @@ -16,7 +16,7 @@ - + void @@ -35,7 +35,7 @@ - + cublasHandle_t @@ -54,7 +54,7 @@ - + cudaStream_t @@ -73,7 +73,7 @@ - + void @@ -96,7 +96,7 @@ - + std::vector< cudaStream_t > @@ -115,7 +115,7 @@ - + void * @@ -134,7 +134,7 @@ - + void @@ -157,7 +157,7 @@ - + size_t @@ -176,7 +176,7 @@ - + std::pair< void *, size_t > @@ -191,7 +191,7 @@ - + cudaStream_t @@ -210,7 +210,7 @@ - + cublasHandle_t @@ -225,7 +225,7 @@ - + cudaStream_t @@ -239,7 +239,7 @@ - + size_t @@ -253,7 +253,7 @@ - + GpuMemoryReservation @@ -271,7 +271,7 @@ - + void @@ -289,7 +289,7 @@ - + void @@ -303,7 +303,7 @@ - + std::vector< cudaStream_t > @@ -317,7 +317,7 @@ - + cudaStream_t @@ -331,7 +331,7 @@ - + @@ -351,7 +351,7 @@ - + faiss::gpu::GpuResourcesallocMemory faiss::gpu::GpuResourcesallocMemoryHandle diff --git a/xml/classfaiss_1_1gpu_1_1GpuResourcesProvider.xml b/xml/classfaiss_1_1gpu_1_1GpuResourcesProvider.xml index 1486167160..db0b7ba05d 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuResourcesProvider.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuResourcesProvider.xml @@ -17,7 +17,7 @@ - + std::shared_ptr< GpuResources > @@ -33,7 +33,7 @@ - + @@ -59,7 +59,7 @@ - + faiss::gpu::GpuResourcesProvidergetResources faiss::gpu::GpuResourcesProvider~GpuResourcesProvider diff --git a/xml/classfaiss_1_1gpu_1_1GpuResourcesProviderFromInstance.xml b/xml/classfaiss_1_1gpu_1_1GpuResourcesProviderFromInstance.xml index 72e746ab95..a2f59d7ea7 100644 --- a/xml/classfaiss_1_1gpu_1_1GpuResourcesProviderFromInstance.xml +++ b/xml/classfaiss_1_1gpu_1_1GpuResourcesProviderFromInstance.xml @@ -16,7 +16,7 @@ - + @@ -35,7 +35,7 @@ - + @@ -48,7 +48,7 @@ - + std::shared_ptr< GpuResources > @@ -63,7 +63,7 @@ - + @@ -95,7 +95,7 @@ - + faiss::gpu::GpuResourcesProviderFromInstancegetResources faiss::gpu::GpuResourcesProviderFromInstanceGpuResourcesProviderFromInstance diff --git a/xml/classfaiss_1_1gpu_1_1StandardGpuResources.xml b/xml/classfaiss_1_1gpu_1_1StandardGpuResources.xml index 3c8b8045cb..4999a50704 100644 --- a/xml/classfaiss_1_1gpu_1_1StandardGpuResources.xml +++ b/xml/classfaiss_1_1gpu_1_1StandardGpuResources.xml @@ -16,7 +16,7 @@ - + @@ -31,7 +31,7 @@ - + @@ -44,7 +44,7 @@ - + std::shared_ptr< GpuResources > @@ -59,7 +59,7 @@ - + void @@ -73,7 +73,7 @@ - + void @@ -91,7 +91,7 @@ - + void @@ -109,7 +109,7 @@ - + void @@ -131,7 +131,7 @@ - + void @@ -149,7 +149,7 @@ - + void @@ -163,7 +163,7 @@ - + std::map< int, std::map< std::string, std::pair< int, size_t > > > @@ -177,7 +177,7 @@ - + cudaStream_t @@ -195,7 +195,7 @@ - + size_t @@ -213,7 +213,7 @@ - + void @@ -227,7 +227,7 @@ - + void @@ -245,7 +245,7 @@ - + @@ -277,7 +277,7 @@ - + faiss::gpu::StandardGpuResourcesgetDefaultStream faiss::gpu::StandardGpuResourcesgetMemoryInfo diff --git a/xml/classfaiss_1_1gpu_1_1StandardGpuResourcesImpl.xml b/xml/classfaiss_1_1gpu_1_1StandardGpuResourcesImpl.xml index 849abc8424..69dd083d64 100644 --- a/xml/classfaiss_1_1gpu_1_1StandardGpuResourcesImpl.xml +++ b/xml/classfaiss_1_1gpu_1_1StandardGpuResourcesImpl.xml @@ -17,7 +17,7 @@ - + std::unordered_map< int, std::unique_ptr< StackDeviceMemory > > @@ -31,7 +31,7 @@ - + std::unordered_map< int, cudaStream_t > @@ -45,7 +45,7 @@ - + std::unordered_map< int, cudaStream_t > @@ -59,7 +59,7 @@ - + std::unordered_map< int, std::vector< cudaStream_t > > @@ -73,7 +73,7 @@ - + std::unordered_map< int, cudaStream_t > @@ -87,7 +87,7 @@ - + std::unordered_map< int, cublasHandle_t > @@ -101,7 +101,7 @@ - + void * @@ -115,7 +115,7 @@ - + size_t @@ -128,7 +128,7 @@ - + size_t @@ -142,7 +142,7 @@ - + size_t @@ -156,7 +156,7 @@ - + bool @@ -170,7 +170,7 @@ - + @@ -185,7 +185,7 @@ - + @@ -198,7 +198,7 @@ - + void @@ -212,7 +212,7 @@ - + void @@ -230,7 +230,7 @@ - + void @@ -248,7 +248,7 @@ - + void @@ -271,7 +271,7 @@ - + void @@ -289,7 +289,7 @@ - + cudaStream_t @@ -308,7 +308,7 @@ - + void @@ -322,7 +322,7 @@ - + void @@ -340,7 +340,7 @@ - + void @@ -360,7 +360,7 @@ - + cublasHandle_t @@ -379,7 +379,7 @@ - + std::vector< cudaStream_t > @@ -398,7 +398,7 @@ - + void * @@ -417,7 +417,7 @@ - + void @@ -440,7 +440,7 @@ - + size_t @@ -459,7 +459,7 @@ - + std::map< int, std::map< std::string, std::pair< int, size_t > > > @@ -473,7 +473,7 @@ - + std::pair< void *, size_t > @@ -488,7 +488,7 @@ - + cudaStream_t @@ -507,7 +507,7 @@ - + cublasHandle_t @@ -522,7 +522,7 @@ - + cudaStream_t @@ -536,7 +536,7 @@ - + size_t @@ -550,7 +550,7 @@ - + GpuMemoryReservation @@ -568,7 +568,7 @@ - + void @@ -586,7 +586,7 @@ - + void @@ -600,7 +600,7 @@ - + std::vector< cudaStream_t > @@ -614,7 +614,7 @@ - + cudaStream_t @@ -628,7 +628,7 @@ - + @@ -648,7 +648,7 @@ - + @@ -672,7 +672,7 @@ - + @@ -704,7 +704,7 @@ - + faiss::gpu::StandardGpuResourcesImplallocLogging_ faiss::gpu::StandardGpuResourcesImplallocMemory diff --git a/xml/namespacefaiss_1_1gpu.xml b/xml/namespacefaiss_1_1gpu.xml index a0b31fedcb..c6e28ccf64 100644 --- a/xml/namespacefaiss_1_1gpu.xml +++ b/xml/namespacefaiss_1_1gpu.xml @@ -127,7 +127,7 @@ - + @@ -155,7 +155,7 @@ - + @@ -196,7 +196,7 @@ - + @@ -228,7 +228,7 @@ - + @@ -350,7 +350,7 @@ - + @@ -389,7 +389,7 @@ - + @@ -699,7 +699,7 @@ - + GpuIndex * @@ -717,7 +717,7 @@ - + bool @@ -735,7 +735,7 @@ - + bool @@ -753,7 +753,7 @@ - + std::string @@ -771,7 +771,7 @@ - + std::string @@ -789,7 +789,7 @@ - + AllocInfo @@ -811,7 +811,7 @@ - + AllocInfo @@ -833,7 +833,7 @@ - + AllocInfo @@ -859,7 +859,7 @@ - + int @@ -1928,7 +1928,7 @@ - + void @@ -1954,7 +1954,7 @@ - + idx_t @@ -1980,7 +1980,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1AllocInfo.xml b/xml/structfaiss_1_1gpu_1_1AllocInfo.xml index 8aed442b5b..a26e44872c 100644 --- a/xml/structfaiss_1_1gpu_1_1AllocInfo.xml +++ b/xml/structfaiss_1_1gpu_1_1AllocInfo.xml @@ -18,7 +18,7 @@ - + int @@ -33,7 +33,7 @@ - + MemorySpace @@ -48,7 +48,7 @@ - + cudaStream_t @@ -64,7 +64,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -108,7 +108,7 @@ - + std::string @@ -122,7 +122,7 @@ - + @@ -142,7 +142,7 @@ - + faiss::gpu::AllocInfoAllocInfo faiss::gpu::AllocInfoAllocInfo diff --git a/xml/structfaiss_1_1gpu_1_1AllocRequest.xml b/xml/structfaiss_1_1gpu_1_1AllocRequest.xml index 90e50608dd..532d163f4d 100644 --- a/xml/structfaiss_1_1gpu_1_1AllocRequest.xml +++ b/xml/structfaiss_1_1gpu_1_1AllocRequest.xml @@ -18,7 +18,7 @@ - + AllocType @@ -33,7 +33,7 @@ - + int @@ -48,7 +48,7 @@ - + MemorySpace @@ -63,7 +63,7 @@ - + cudaStream_t @@ -79,7 +79,7 @@ - + @@ -94,7 +94,7 @@ - + @@ -115,7 +115,7 @@ - + @@ -148,7 +148,7 @@ - + std::string @@ -162,7 +162,7 @@ - + @@ -194,7 +194,7 @@ - + faiss::gpu::AllocRequestAllocInfo faiss::gpu::AllocRequestAllocInfo diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexBinaryFlatConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexBinaryFlatConfig.xml index 65caf7af73..87ca800089 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexBinaryFlatConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexBinaryFlatConfig.xml @@ -17,7 +17,7 @@ - + MemorySpace @@ -32,7 +32,7 @@ - + bool @@ -47,7 +47,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexCagra.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexCagra.xml index e49f600c81..df289b83b7 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexCagra.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexCagra.xml @@ -16,7 +16,7 @@ - + std::shared_ptr< RaftCagra > @@ -30,7 +30,7 @@ - + std::shared_ptr< GpuResources > @@ -44,7 +44,7 @@ - + const GpuIndexConfig @@ -58,7 +58,7 @@ - + size_t @@ -72,7 +72,7 @@ - + @@ -105,7 +105,7 @@ - + void @@ -128,7 +128,7 @@ - + void @@ -146,7 +146,7 @@ - + void @@ -164,7 +164,7 @@ - + void @@ -179,7 +179,7 @@ - + std::vector< idx_t > @@ -192,7 +192,7 @@ - + int @@ -206,7 +206,7 @@ - + std::shared_ptr< GpuResources > @@ -220,7 +220,7 @@ - + void @@ -238,7 +238,7 @@ - + size_t @@ -252,7 +252,7 @@ - + void @@ -275,7 +275,7 @@ - + void @@ -302,7 +302,7 @@ - + void @@ -334,7 +334,7 @@ - + void @@ -374,7 +374,7 @@ - + void @@ -418,7 +418,7 @@ - + void @@ -446,7 +446,7 @@ - + void @@ -478,7 +478,7 @@ - + void @@ -986,7 +986,7 @@ - + void @@ -1013,7 +1013,7 @@ - + void @@ -1052,7 +1052,7 @@ - + void @@ -1070,7 +1070,7 @@ - + void @@ -1088,7 +1088,7 @@ - + @@ -1275,7 +1275,7 @@ - + faiss::gpu::GpuIndexCagraadd faiss::gpu::GpuIndexCagraadd_with_ids diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexCagraConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexCagraConfig.xml index 6b06b8e60e..0768f99ddb 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexCagraConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexCagraConfig.xml @@ -17,7 +17,7 @@ - + size_t @@ -32,7 +32,7 @@ - + graph_build_algo @@ -47,7 +47,7 @@ - + size_t @@ -62,7 +62,7 @@ - + IVFPQBuildCagraConfig * @@ -76,7 +76,7 @@ - + IVFPQSearchCagraConfig * @@ -90,7 +90,7 @@ - + int @@ -105,7 +105,7 @@ - + MemorySpace @@ -120,7 +120,7 @@ - + bool @@ -135,7 +135,7 @@ - + @@ -180,7 +180,7 @@ - + faiss::gpu::GpuIndexCagraConfigbuild_algo faiss::gpu::GpuIndexCagraConfigdevice diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexConfig.xml index 38f21c0e84..5ee119349f 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexConfig.xml @@ -20,7 +20,7 @@ - + MemorySpace @@ -35,7 +35,7 @@ - + bool @@ -50,7 +50,7 @@ - + @@ -105,7 +105,7 @@ - + faiss::gpu::GpuIndexConfigdevice faiss::gpu::GpuIndexConfigmemorySpace diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexFlatConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexFlatConfig.xml index 2172b0b695..a2b52e868f 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexFlatConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexFlatConfig.xml @@ -32,7 +32,7 @@ - + MemorySpace @@ -47,7 +47,7 @@ - + bool @@ -62,7 +62,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFConfig.xml index cdc55bd2d6..c32b3476bf 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFConfig.xml @@ -64,7 +64,7 @@ - + MemorySpace @@ -79,7 +79,7 @@ - + bool @@ -94,7 +94,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFFlatConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFFlatConfig.xml index 3e8b80de4c..033baa0f2f 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFFlatConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFFlatConfig.xml @@ -76,7 +76,7 @@ - + MemorySpace @@ -91,7 +91,7 @@ - + bool @@ -106,7 +106,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFPQConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFPQConfig.xml index 12c2bab40f..7cb297add8 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFPQConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFPQConfig.xml @@ -122,7 +122,7 @@ - + MemorySpace @@ -137,7 +137,7 @@ - + bool @@ -152,7 +152,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizerConfig.xml b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizerConfig.xml index 4f5f8c7623..37d2073a2d 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizerConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuIndexIVFScalarQuantizerConfig.xml @@ -76,7 +76,7 @@ - + MemorySpace @@ -91,7 +91,7 @@ - + bool @@ -106,7 +106,7 @@ - + diff --git a/xml/structfaiss_1_1gpu_1_1GpuMemoryReservation.xml b/xml/structfaiss_1_1gpu_1_1GpuMemoryReservation.xml index 4e3ba58b95..182df17d0e 100644 --- a/xml/structfaiss_1_1gpu_1_1GpuMemoryReservation.xml +++ b/xml/structfaiss_1_1gpu_1_1GpuMemoryReservation.xml @@ -15,7 +15,7 @@ - + int @@ -28,7 +28,7 @@ - + cudaStream_t @@ -41,7 +41,7 @@ - + void * @@ -54,7 +54,7 @@ - + size_t @@ -67,7 +67,7 @@ - + @@ -82,7 +82,7 @@ - + @@ -115,7 +115,7 @@ - + @@ -132,7 +132,7 @@ - + @@ -145,7 +145,7 @@ - + GpuMemoryReservation & @@ -162,7 +162,7 @@ - + void * @@ -175,7 +175,7 @@ - + void @@ -188,7 +188,7 @@ - + @@ -209,7 +209,7 @@ - + faiss::gpu::GpuMemoryReservationdata faiss::gpu::GpuMemoryReservationdevice diff --git a/xml/structfaiss_1_1gpu_1_1IVFPQBuildCagraConfig.xml b/xml/structfaiss_1_1gpu_1_1IVFPQBuildCagraConfig.xml index 06e61ad4fd..4acd4db2d5 100644 --- a/xml/structfaiss_1_1gpu_1_1IVFPQBuildCagraConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1IVFPQBuildCagraConfig.xml @@ -17,7 +17,7 @@ - + uint32_t @@ -32,7 +32,7 @@ - + double @@ -47,7 +47,7 @@ - + uint32_t @@ -64,7 +64,7 @@ - + uint32_t @@ -81,7 +81,7 @@ - + codebook_gen @@ -96,7 +96,7 @@ - + bool @@ -113,7 +113,7 @@ - + bool @@ -129,14 +129,14 @@ - + - + faiss::gpu::IVFPQBuildCagraConfigcodebook_kind faiss::gpu::IVFPQBuildCagraConfigconservative_memory_allocation diff --git a/xml/structfaiss_1_1gpu_1_1IVFPQSearchCagraConfig.xml b/xml/structfaiss_1_1gpu_1_1IVFPQSearchCagraConfig.xml index 9dea3ad71d..4b9c648de6 100644 --- a/xml/structfaiss_1_1gpu_1_1IVFPQSearchCagraConfig.xml +++ b/xml/structfaiss_1_1gpu_1_1IVFPQSearchCagraConfig.xml @@ -16,7 +16,7 @@ - + cudaDataType_t @@ -33,7 +33,7 @@ - + cudaDataType_t @@ -50,7 +50,7 @@ - + double @@ -68,14 +68,14 @@ - + - + faiss::gpu::IVFPQSearchCagraConfiginternal_distance_dtype faiss::gpu::IVFPQSearchCagraConfiglut_dtype diff --git a/xml/structfaiss_1_1gpu_1_1SearchParametersCagra.xml b/xml/structfaiss_1_1gpu_1_1SearchParametersCagra.xml index 0fd663b8f3..6c40684e17 100644 --- a/xml/structfaiss_1_1gpu_1_1SearchParametersCagra.xml +++ b/xml/structfaiss_1_1gpu_1_1SearchParametersCagra.xml @@ -17,7 +17,7 @@ - + size_t @@ -33,7 +33,7 @@ - + size_t @@ -48,7 +48,7 @@ - + search_algo @@ -63,7 +63,7 @@ - + size_t @@ -78,7 +78,7 @@ - + size_t @@ -93,7 +93,7 @@ - + size_t @@ -108,7 +108,7 @@ - + size_t @@ -123,7 +123,7 @@ - + hash_mode @@ -138,7 +138,7 @@ - + size_t @@ -153,7 +153,7 @@ - + float @@ -168,7 +168,7 @@ - + uint32_t @@ -183,7 +183,7 @@ - + uint64_t @@ -198,7 +198,7 @@ - + IDSelector * @@ -251,7 +251,7 @@ - + faiss::gpu::SearchParametersCagraalgo faiss::gpu::SearchParametersCagrahashmap_max_fill_rate