From c478401626ccd5a368987847003cf12e7c52c851 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:29:28 -0400 Subject: [PATCH] Set _hipscat_index as pandas index, where possible. (#415) * Set _hipscat_index as pandas index, where possible. * Fix bad merge. --- .../hipscat/abstract_catalog_loader.py | 11 ++++++++++- .../small_sky/Norder=0/Dir=0/Npix=11.parquet | Bin 8880 -> 5857 bytes tests/data/small_sky/_common_metadata | Bin 4018 -> 995 bytes tests/data/small_sky/_metadata | Bin 5130 -> 2107 bytes 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/lsdb/loaders/hipscat/abstract_catalog_loader.py b/src/lsdb/loaders/hipscat/abstract_catalog_loader.py index 3e655848..ed10f8f0 100644 --- a/src/lsdb/loaders/hipscat/abstract_catalog_loader.py +++ b/src/lsdb/loaders/hipscat/abstract_catalog_loader.py @@ -13,6 +13,7 @@ from hipscat.io.file_io import file_io from hipscat.pixel_math import HealpixPixel from hipscat.pixel_math.healpix_pixel_function import get_pixel_argsort +from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN from upath import UPath from lsdb.catalog.catalog import DaskDFPixelMap @@ -86,6 +87,9 @@ def _create_dask_meta_schema(self, schema: pa.Schema) -> npd.NestedFrame: dask_meta_schema = schema.empty_table().to_pandas(types_mapper=self.config.get_dtype_mapper()) if self.config.columns is not None: dask_meta_schema = dask_meta_schema[self.config.columns] + + if dask_meta_schema.index.name != HIPSCAT_ID_COLUMN and HIPSCAT_ID_COLUMN in dask_meta_schema.columns: + dask_meta_schema = dask_meta_schema.set_index(HIPSCAT_ID_COLUMN) return npd.NestedFrame(dask_meta_schema) def _get_kwargs(self) -> dict: @@ -104,6 +108,11 @@ def read_pixel( **kwargs, ): """Utility method to read a single pixel's parquet file from disk.""" - return file_io.read_parquet_file_to_pandas( + dataframe = file_io.read_parquet_file_to_pandas( hc.io.pixel_catalog_file(catalog.catalog_base_dir, pixel, query_url_params), columns=columns, **kwargs ) + + if dataframe.index.name != HIPSCAT_ID_COLUMN and HIPSCAT_ID_COLUMN in dataframe.columns: + dataframe = dataframe.set_index(HIPSCAT_ID_COLUMN) + + return dataframe diff --git a/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet b/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet index e0cb8d948d9a14a7ac5937856204a9fa48684fad..e4aa453abde7ff8812f203dbeccceb3c0690a711 100644 GIT binary patch delta 1076 zcma)6-Ahwp7~jrzoQ~Btr(-7<9v8i#HaBPHM>%`X=7!eHjcQH^T2tpI+Dhl_VkHq? zB!N$OrG^j@eNos&bS+&31qF$8RS?ltSN#D!?|b}!(S;A3_xb+#JrA5$9Y5Uq9vZ7( ze`frm7oOT6Z$7GjVj;!w@t^}1Y^AUDf==F&o#lF|QB!1*9KvvaxnRF5*#Fw>b0+fg zFxf1FHH&{erF*9p=BuGnl*C7bEV&k~{`H&UEv>NO1!_Gces`1iHE_l12eZ)vBZdpY zhXz=)R*G+Y1MuG**bKZRF2sxNSTRU(IEZv`FyN zJYz&al2ckH|Jgtl*$4dg0_5$5pxX<<`gNoLDOw7k9@!VX+CpeG@X%(?I~^t(i=B_R zUQZ1Tj3*siqKDB;IzTCn;>YMLrCF+^X^7G^4WzkEQ7DZt2?IMBM}3E*Hp)B`tU`iz zpqNEB66!g|yV!#j&B(f$%zFhQ5SQx?C^^rx$HSYS8-Yc++B81h2#26rxyb#vN=n1Au|zf)-)r6gMl_*<3I#V>r{J7*@(9up_0&j=3*FMP37iyVE z_A0XpU&!0*8=LA2Upm3V@K{(6OC(TLFQ(et+B6P%GMTwHHE_e7nw)eeGa1OaOopa9 XZ=F|KhWjpu$*Iw3iVPYJ`&Ik{D{Cx8 literal 8880 zcmd5?3wV>|z5mjN@&y`fX%kZ@SlbdJ6i7l_8W5W=X`3eWLJMuuBGTO2q$FwDG`%4+ zvW+3k$-Hnlct;=YVLTj%h#(h(GQ=Ajg3u{b6cjzxZ(NDytMOGL|KMAT@CT1(Tf$56v~d`axE;e6_lFoBv3nkl5I zvqA|Ki}pvLPr>uS)4;y~Ga>sc=mdB<_*c;$1Ns;=4m1t&(~!RdoCu_Ve+Ev1?f^&$ zx(fOL@=Kt2VH!0Yco{ZhAoeF*#$Xa@Ry4x2xsy$^H* z^aW@NZ2ks$4(K~j4`f1&7exCU+J`|h^i2RBgZxY2Xy6{`M}ocu-44otJQ=hTwkhDx zfF^*3K^6zv5BWF1Z1fcY-vFjVzYBB}{JWs9Ks=$0dLOb`AOZL*uuDYy51==}r-6SB z?a$Eu8uS+E@1VV)??ER)HwlX=G4M9f+weOI_UA$W2R;V3-Tsdliv2qkLJ=ieA0yfjlSS4_f+$WDmnxy}h~30I#G7pq{&%RrcRS9l+&|wa%beJX3n~O_8s|ZMx!k#EYcP04RcCL%gX0g zRL+}URc$oY)GoMl;a!XB%oeN7UfXqwyLym6k4G2#)V$#|MYjrQY8$G4S(w0=`2`XzADUVVT0t;ThTjelq4?O-Vqq#5Bq$=wP=&w{RPaWK-hNw5rP;5JsiAN*6S-CB5)ae+d=nzePnwi|XU;4crl9#${!%|X`fQ?nIb;Zvg? zAMr#dc@ZmzRes@DOK8rlSxr_>v=ox2^PbSciOW!@(XkcDMa^mVZI<8vH8nT0^zgiz z1CP}Gx7hXmUiZS|OOG?jI?2lQGbWu}d%s=1_H)%!`#UuM^GfC9x3}Csc~^PP9=UhV z{N%T9$|*e**Rd1f&_lB1dBk8;;R!SRH;D156*#^fe6`Ep%%*cYCZJD0`$~DR|L|6G!Up67zPvb za0aLoy$`0M5z)Kaej-;|Fj8XWkxU^AF4l z4n4a8RWMX0WEL(?gcO7HpIr*x)8hFRlGpF0f_0h9)3UcV%SxKF4;)fIkW~2Iv+4r{ z6;<}CBcIjHD{_u~qw4W*?yZxJPuQDxaJnHS=Qj@)RerXw>fs{ynm_xi8y>%_^R5Q} z)5o3bPJOv>{JO5&x;IRgk!>lxIJR$gV{LXTH@&-Q@zr6{)hW_TqTcC^h;@hz2A$pn zQF4ihyHe>?wo>>(rNp8!(YpYuP@T{Udx?;O87T{G0^af%O8VXSzH^IiF)y;Wv^2GZ z^5BX0@uVlmNwrSPmxlmT5uM1zH0j>7pPUFSitn8eemu$%JsG)_ zj87C2_7{!Ff*;FF6~%;3gt)S%7Mnd(?w^U5^Q2cZq(?GR2MqBODvFV~1BL_?Zbb#P zd}3eqVhfz1h}Q7LPw=E`rP41ZkdVUN`wjHNp^bzy@)xye$%ws5Nw@{e$a?L(tuHiw znb+CDElw`B12X9oGB&nBMjA9WJX5dU)bE*EM^S;h@4kBO1>neCWq$(($6LCtyucIE zc@(8iIQZbq-$R$NOPgL&rcvI>!ACHcI;=`;~9-SqeU)~9+`bnSv zQAicvn4ja(j@(yIe+ntpb;s;Wq;K0BrK-)a8h-HUVRQ+zYF<6KVC+k6E+Tc%?d|CM z^{=QtIt#%;nC+8teX132c!?R`mGOvl;_fxW5B?Sv)`W}V?49! zgYOAHy8PJ@FM`)+YLbD$b>hu;5_}@=-`E`JM9y>i1t+kk-u*ZM*;^RBb3H0MHZo=MKu)JU^kCw;&Vr39YwPEFo@^9-BRMPQH^QUY{hrG*fzHQs@ki z%5AW7GG${9C5iJ{9d@@_G#Vcu6Jp3`4J8^u8ZBn7LEb!^F8p-h*uEPW-!bSBy67f| zOExCaqcO$ofaIo`kP4AHEMiai0kQ#k^=a{mm6DqB*p2=6y1`HQ8~uj-mpt&K|B~02 z^twEqq}TDSBfXxF6X}iF%i{Oc*e6n2{HZdTqEfOXabB~>X7)+W36`X@2gM?5lgsb+ z_|j*lEl7_%L(&&!rKNLf?ohF_R!h=7X15*o9>2>4i+ZQsWm^;} z)6!yY=_H4Nx6_P&qG%_%x7k~K&L$6$Dzc}_rz+C1VSATOKcy17>FG-;Njy_oS#F%k zJ!>Qzlctfsu7+ZUVJbl6Zw|v`Gnxjbh+!HSZ38RQXd?9OL|7QsYb#(&c>J-WuzEei zv@_@jWCsEE8r7hsX|FNnEUqz?GE8|M`f6tA-7bHvt~%SKENinASGQU`rOtAv#-ewr zn#?9mSBgTnSxchv03=S^0T5qkfW3fru zTT{kqbr-VZSan61v!bs4b*XiCwAGj@lx18FsK27!Ufn;=TAu}T`A@~-u5{IO4poz| zZm@ZDTXZVNwdyh;-qu>oA##$}gx5%iszgiHXaD?*>T6AgPRn#XTZgE%r8RWcRI6N8 zkD<-#WaAzzHm}8Vtvb2-&1dBLvUsX}mSU`<;)r##80+ze;)=2vYz`yVil@5OTI}k- zH~Po@qv|!^t?IunT^vO>W=}(#7%~pRGvw0b#wU%?c=J6PubpMJ&EzU)gDM~3U&Bhd`d^gSJr6_M(2 zUMoUpBk@wuh)!JjxK8s!e5ih!-uDI>i~NyJn!fXui`0isBz}geB0t)u5U*i62+v9T z9V^yqiLQY(C6vN|NR^w>10hty8RC?-ceCm==2ErE$-{l*dvjYN-FH N_wkQRbMgPG{6A|>(+~gv diff --git a/tests/data/small_sky/_common_metadata b/tests/data/small_sky/_common_metadata index 4cf7a744a1681971dbfc1dc15ea897e169977525..8b9d1c7542b2325ea17d1cea09bf2a88d13969c0 100644 GIT binary patch literal 995 zcmah|&2HL25MBX|(kSX76_JrRaN$%XCTXiAl_GW41_TWhP+Bk-#)QVbl1iL$b-^_lq?!n-wSSUTc7uwdsnR+w3xF?E*YEFAKnFi6qvF|3q z-1Q!+<%_4+ANPb%-dx~{;tq!8Vp|>ZuIDJDpN>h6ouZmF#8!eH=u_(3(R{d%F3Jop9x^zl#f2_OOKLj%AE!)Ke6 z03AAEV2gIt*E?$0?|EOzhc{F zlj4aOfJY=LTQ4DFrXJm2UXO>}tMRCvetS#Q$QDscf$)VE2`hblUq@MKWnn&@StEl1 zaMi;sE{Eelao2jk!I>EBd*J@23UO)Uk3Joq#%<^$0T z=xp*=eE&5+8gnp_kNJsuAKVn*ABT=}LOv1K0h1g!>rjKglEVM`R z(K>F_M#nezIG=YMhOZ6r5Q3gg=HCw}II=;7!68x@594gdfE literal 4018 zcmcInTW{J}5JqiPwX14ZYL$veeTXdeY1;+}bVH?;_SgYqHX((kwsEzJd;?5uE&7wTCfjJ9E<=Z^HPW>y%t8G>ZUT1trD+0-Hpv-Vaw8%EP; ztsi7c{)TTX(X$rQ8bH=}G|6fyYi~*4Fg6BHj!}Nyzm$BNe^8=XO9^p{nQh64b{PFO zLsCSz(rdcf{S~@=r*obFRE&eLDa3*&#mH{RSbV4gdK3PDq*XeNrc+PlP_D z@%$Qpio{yV7uq0;Yj~&@%9jY464(ddR0(`9gu3v?PrFLxs;iz6a()bZ`ClZ@9c!Xo zR!gmk(U4=qKXc9<-jLktNLTsmZmC{SOH4*`3y~x=hel~uG?YqD2VI&vaLu{&R;tIK zt1>W@kGOwqDYNKQAdcp_t|4}MhF6cZ4aS6>p6C}jt>53d__3F5*6kU{q7+q0&K0h$L&cG~I3%1HHcw_6N zo8Zafg%}lD!?uI5Fci7aRXPJptfevPeV8Kdr7gcXV7_UArp&I?(junBF})hqV~TY+ zv-8kBgi}?WYFHUWF6{i*%aF+iHi`Q|x^Ib7XdE3s>GsNoPwzzEIjWsw! z@Tt&Eb7WRM6)-bNi|jBg=jA)m4UlOCFt@qRaf=9hcp;#^Sc% z-|4K>(jYJ!ts(TXq_6J8*3(6IYL;39t6mMDAGtQG`Tr+wN#8U*8TwXrp7hyBSD@C^ zW`*`gpRd3>)TK#t!Sdv}S#l?agFRT9wS@BYbXaq+*GbEGmY|leb)`C{I)>8tOekMV zet`%$&rWfS@e$6pHXU<)Sf9{I=iyKj>AX9}S75-zyZHI=5SKN94n+Zp$$X@*W%%T? zu>)#v0`s_lLPGfv-cgVYa7}P%GX40H=)3Uwz@|&#Gmdf!>U_gKPpND^Q(2g~gsoFEZ; z(vY+a5JmeCj}-seR{Yd-qsdML{09FaF4TXzAA$ka`*?o>*D3Dy^3ZnqgQ4RC4L(61 z91iTC;=L((s6XV_T>c=?!_Q+oer*Q9YkCPhNG^E??E1I==Zjo!*I(|hpK`hP@N4}a DYK`T} diff --git a/tests/data/small_sky/_metadata b/tests/data/small_sky/_metadata index 26df207b47e12572da8ef6e66e909f2e2d10490f..7134c1428417e04faff3eef8c9183097d70731d7 100644 GIT binary patch delta 890 zcmZ`$O=uHQ5Z-JyHI$a7jaibik{IyOR?NoK{%Wad zh!hbIjZb=#f}kQ=3Iz{>ARdZ`9`sgfEuYWb=g}03`4yy9`hUtpH(w0t5^Co~8lB z2qAHhJjVMTqbbUiNFfaa2+Np8!?;2|N((ABpoJs>FXoec05ItSEr!+dEEu;fs|43l^Rwgohj&k%+sD~11#7E`7(25njoPz_?D4MkYa_N-Hq&)@^Nzq)FE_;6iU#BDJhUnT*W8B2e~^^PoGV5DP9WKTdMt#g5w0Ejjoa7hJeBAD%ZfE_faXZJ7Rv6ySMHNAZ)wEcJ~@EXT4t8*Jh`_c-C);-Ze&M}UrWZ&Iy z7*0o(yZeT%>0{c^!};ZvK z>EcT8{1Cn^Gx$H85B+D6|NHm&&t3t1iupxv-5tGuZyoXd(lg;d`ThtWmy8kkBoKP~ z691cJ{;S}Sz|gqKlKPqNY6yze?J)I-`goJi@DHebNs(w=l}TnkOk(Hg$^T+ z#f0=sEFMKhEJho2oW_$?{?Du5HjRJ0xe}arcY}K_7>8el9?tPk-{3!gW90^Sc>xzc zOy6gU_Ti64(c(d47{8qheK^Pej_1F)gdkp;M+rX@*vCH}fzcy8j`r8f{D+q*+IKJ} zc*23<=uBw;y*)ykDh>?yFH51{FY#a9;y?e18|8ZT(fb!a!6@H*_uZ#oel!C&^tW{= z3w$8-ulPj{^x;KEwl%pE{_^Gf5n6fn)t1?_>`vs?>MtXcMJe+3mDLDSvnr)itNRgK zwsf$!d!`8%HA6SG{Ye=^+3yYr6YLv3k#1g!0zbN+Kcd%LQS|~LSrhf+ zjpS2cJ&5Ec+E2#kiBF3UDv}?`70!T}wHXnI$xj2qyMP~QD1D(&QzgN7(){{w3{Ovb zTbH}7HmwGxp|s`p0E>Y$kl{ks$Jh_`cE@PhD2*kqMX$voaA1d9XG|Ff-Sx-=E_{I$ z3cICS?EV*iytIbTaea#rl868J{DP1K5$YsONS%mvDieeWeIJFJo?gs@Eq=p8y->YG zNT0ww_@z$ZcW6|BkSr`gQOZQ_4cJe7%rVGw7EX zN<4e7TFOP4K4xl}G{jSANX=GVA7o9%${kiSz!y1**v6CN(|N7q<+*)8rCd*Jf4@Tb#z!M z=i@mR1LQx}*Nfvks~rW_@@sLi3T9<+QcgVGtaY@MOsa8OUZ(hVtFVR;Q?%mkk)zbM zi2HLq{&cNc&JL9863v5eZ;9DKrI<2RJA0@al=m!dj$)sdCv$8)C-;|P7dy%p?4zxT zeRBZ&@mailHnY}nVz1c6uDWH8pN(Z<#JveDUP3ULsZM%q=2{zxqx3v=H zs{Z_Y`=H>s>*7CSBi*bui#;PZlltd5X9vl?TxQ&7?4w;ZW3sqU&dbSXjpc2qf3CGE z#~Y5im1{yRbL;Af+-h>h?5puyLrbR|s7I#G&iuENH@9x8RuSq}YT(w{j$DGXR?a4A zeYAxnjNx3WbT4RDaiGS{L&d-vEFI;Y|3Sc zL_IvRgY!}0;?br}BcmMiVBn^sz~M?zfviVR+v_U-;l8VN_ok(+*5R{)F7cm)LT@1#*PnOga|N1`44j zHAzkZQ^bdSc>H%}@~6;SS+*h&H^dKlq4CrCkPJYt