Difference between revisions of "ZEST v1.0"

From Public PIC Wiki
Jump to navigation Jump to search
m (Tallada moved page ZEST to ZEST v1.0)
Line 170: Line 170:
 
SELECT * FROM tallada.zest
 
SELECT * FROM tallada.zest
 
;
 
;
 +
</pre>
 +
 +
 +
== UPDATE ==
 +
 +
I update the catalog to include the cosmos_2015_dr_2_1 index to be able to make joins directly in COSMOHUB.
 +
 +
To do the matching I use a python notebook which can be found here:
 +
 +
> /nfs/pic.es/user/j/jcarrete/python_notebooks/Matching_ZEST_and_COSMOS2015.ipynb
 +
 +
I create a table with the "link" (`sequentialid`, `cosmos2015_id):
 +
 +
<pre>
 +
CREATE EXTERNAL TABLE jcarrete.zest_cosmos2015_spatial_matching_csv (
 +
sequentialid int,
 +
ra float,
 +
dec float,
 +
cosmos2015_id bigint,
 +
cosmos2015_distance float
 +
)
 +
ROW FORMAT SERDE
 +
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
 +
WITH SERDEPROPERTIES (
 +
  'field.delim'=',',
 +
  'serialization.format'=',')
 +
STORED AS INPUTFORMAT
 +
  'org.apache.hadoop.mapred.TextInputFormat'
 +
OUTPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 +
LOCATION
 +
  'hdfs://namenode01/user/jcarrete/data/zest_cosmos2015_spatial_matching/'
 +
;
 +
</pre>
 +
 +
This is the ZEST table updated, which include the COSMOS2015 id:
 +
 +
<pre>
 +
CREATE TABLE cosmohub.`zest_v1_0_cosmos2015_join`(
 +
  `sequentialid` int COMMENT 'Identification number',
 +
  `cosmos2015_id` bigint COMMENT 'id from COSMOS2015 DR2.1 catalog to be able to JOIN both catalogs',
 +
  `cosmos2015_distance` float COMMENT 'estimated distance from the nearest galaxy when using sklearn.neighbors.KDTree (ra, dec)',
 +
  `ra` float COMMENT 'Right ascension (degrees)',
 +
  `dec` float COMMENT 'Declination (degrees)',
 +
  `capak_id` float COMMENT 'ID based on ground-based photometric catalog of Capak 2006',
 +
  `capak_ra` float COMMENT 'Closest matched right ascension to ground-based catalog within 0.6" (degrees)',
 +
  `capak_dec` float COMMENT 'Closest matched declination to ground-based catalog within 0.6" (degrees)',
 +
  `acs_mag_auto` float COMMENT 'Total magnitude of object- used for catalog selection',
 +
  `acs_magerr_auto` float COMMENT 'Magnitude error',
 +
  `acs_x_image` float COMMENT 'X-pixel position on ACS-tile',
 +
  `acs_y_image` float COMMENT 'Y-pixel position on ACS-tile',
 +
  `acs_xpeak_image` float COMMENT 'X-pixel position of object peak flux on ACS tile',
 +
  `acs_ypeak_image` float COMMENT 'Y-pixel position of object peak flux on ACS tile',
 +
  `acs_alphapeak_` float COMMENT 'RA of object peak flux (degrees)',
 +
  `acs_deltapeak_` float COMMENT 'Dec of object peak flux (degrees)',
 +
  `acs_a_image` float COMMENT 'SExtractor semi-major axis',
 +
  `acs_b_image` float COMMENT 'SExtractor semi-minor axis',
 +
  `acs_theta_image` float COMMENT 'SExtractor position angle',
 +
  `acs_elongation` float COMMENT 'SExtractor a/b [semi-major axis / semi minor axis]',
 +
  `acs_class_star` float COMMENT 'SExtractor stellarity parameter',
 +
  `acs_ident` float COMMENT 'Unique number that identifies object',
 +
  `acs_se` float COMMENT '0 = Detection from \'cold\' pass of Sextractor, 1 = Detection from \'hot\' pass of Sextractor',
 +
  `acs_mu_class` float COMMENT 'Type of object (1 = galaxy, 2 = star, 3 = spurious)',
 +
  `acs_overlap` float COMMENT 'Object overlap flag. All objects here have overlap =1 i.e. no overlap.',
 +
  `acs_nearstar` float COMMENT 'Object in star mask flag. All the objects in this morphological catalog have "nearstar = 1" which means they are not in a star mask.',
 +
  `acs_mask` float COMMENT 'Object in automatic mask flag: 2 = star used to make automatic mask',
 +
  `acs_masked` float COMMENT 'Object in manual mask flag: -1 = manual mask (astrophysical obj), 0 = manual mask (artifact or image defect), 1 = not in manual mask',
 +
  `acs_clean` float COMMENT 'Object useable flag: 0 = do not use this object, 1 = use this object',
 +
  `acs_unique` float COMMENT 'Object unique flag. All the objects in this morphological catalog have "unique = 1" which means they are all "unique" in the original ACS catalog.',
 +
  `gg` float COMMENT 'ZEST Gini coefficient',
 +
  `m20` float COMMENT 'ZEST second order moment of the 20% brightest pixels',
 +
  `cc` float COMMENT 'ZEST concentration',
 +
  `aa` float COMMENT 'ZEST asymmetry',
 +
  `r20` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 20% of total light (pixels)',
 +
  `r50` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 50% of total light (pixels)',
 +
  `r80` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 80% of total light (pixels)',
 +
  `rpet` float COMMENT 'Zest semi-major axis length for Petrosian Radius ellipse (pixels)',
 +
  `flagrpet` float COMMENT 'Petrosian radius flag: 0 = single Petrosian radius measured, 2 = multiple radii measured (noisy SB profile), 5 = SB profile was corrected for diffuse light contribution from neighboring object(s)',
 +
  `flux_gim2d` float COMMENT 'GIM2D total flux (counts)',
 +
  `le_flux_gim2d` float COMMENT 'LE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)',
 +
  `ue_flux_gim2d` float COMMENT 'UE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)',
 +
  `r_gim2d` float COMMENT 'GIM2D psf-convolved half-light radius of object (arcseconds)',
 +
  `le_r_gim2d` float COMMENT '99% conf. lower error on R_GIM2D',
 +
  `ue_r_gim2d` float COMMENT '99% conf. upper error on R_GIM2D',
 +
  `ell_gim2d` float COMMENT 'GIM2D ellipticity = 1-b/a of object',
 +
  `le_ell_gim2d` float COMMENT '99% conf. lower error on ELL_GIM2D',
 +
  `ue_ell_gim2d` float COMMENT '99% conf. upper error on ELL_GIM2D',
 +
  `pa_gim2d` float COMMENT 'GIM2D position angle of object - cw from +y-axis (degrees)',
 +
  `le_pa_gim2d` float COMMENT '99% conf. lower error on PA_GIM2D',
 +
  `ue_pa_gim2d` float COMMENT '99% conf. upper error on PA_GIM2D',
 +
  `dx_gim2d` float COMMENT 'x-offset of GIM2D-model center from ACS-coordinate center (arcseconds)',
 +
  `le_dx_gim2d` float COMMENT '99% conf. lower error on DX_GIM2D',
 +
  `ue_dx_gim2d` float COMMENT '99% conf. upper error on DX_GIM2D',
 +
  `dy_gim2d` float COMMENT 'y-offset of GIM2D-model center from ACS-coordinate center (arcseconds)',
 +
  `le_dy_gim2d` float COMMENT '99% conf. lower error on DY_GIM2D',
 +
  `ue_dy_gim2d` float COMMENT '99% conf. upper error on DY_GIM2D',
 +
  `sersic_n_gim2d` float COMMENT 'GIM2D Sersic index',
 +
  `le_n_gim2d` float COMMENT '99% conf. lower error on SERSIC_N_GIM2D',
 +
  `ue_n_gim2d` float COMMENT '99% conf. upper error on SERSIC_N_GIM2D',
 +
  `r_0p5_gim2d` float COMMENT 'GIM2D half-light radius of object without PSF convolution (arcseconds)',
 +
  `chi_gim2d` float COMMENT 'reduced CHI2 of the best GIM2D model fit',
 +
  `iter_gim2d` float COMMENT 'no. of iterations required for GIM2D fit to converge',
 +
  `pc_1` float COMMENT 'ZEST first principal component',
 +
  `pc_2` float COMMENT 'ZEST second principal component',
 +
  `pc_3` float COMMENT 'ZEST third principal component',
 +
  `type` float COMMENT 'ZEST Type CLASS (1 = Early type, 2 = Disk, 3 = Irregular Galaxy, 9 = no classification)',
 +
  `bulg` float COMMENT 'ZEST "Bulgeness" CLASS - only for Type 2 (disk) galaxies: 0 = bulge dominated galaxy, 1,2 = INTermediate-bulge galaxies, 3 = pure disk galaxy, 9 = no classification',
 +
  `irre` float COMMENT 'ZEST Irregularity CLASS - only for Type 1 (elliptical) galaxies: 0 = regular, 1 = irregular, 9 = no value',
 +
  `elli` float COMMENT 'ZEST Ellipticity CLASS, 0 = fac-on, 1,2 = increasingly higher elongation, 3 = edge-on, 9 = no classification of elongation',
 +
  `stellarity` float COMMENT 'Visual Stellarity flag: 0 if ACS_CLASS_STAR<0.6 (object is ASSUMED to be a galaxy, no visual inspection), 0 if ACS_CLASS_STAR>=0.6 AND object visually identified as a galaxy, 1 if ACS_CLASS_STAR>=0.6 AND visually identified as a star, 2 if ACS_CLASS_STAR>=0.8 (object is assumed to be a star and was not visually inspected), 3 if ACS_CLASS_STAR<0.6 but object is visually identified as a star (e.g. saturated star, etc)',
 +
  `junkflag` float COMMENT '0 = good object, 1 = spurious',
 +
  `acstile` string COMMENT 'ACS TILE number from which the object is extracted')
 +
CLUSTERED BY (
 +
  sequentialid)
 +
INTO 4 BUCKETS
 +
ROW FORMAT SERDE
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 +
STORED AS INPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
 +
OUTPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
 +
;
 +
</pre>
 +
 +
I materialize the JOIN:
 +
 +
<pre>
 +
INSERT OVERWRITE TABLE cosmohub.`zest_v1_0_cosmos2015_join`
 +
SELECT zest.`sequentialid`, link.`cosmos2015_id`, link.`cosmos2015_distance`, zest.`ra`, zest.`dec`, zest.`capak_id`, zest.`capak_ra`, zest.`capak_dec`, zest.`acs_mag_auto`, zest.`acs_magerr_auto`, zest.`acs_x_image`, zest.`acs_y_image`, zest.`acs_xpeak_image`, zest.`acs_ypeak_image`, zest.`acs_alphapeak_`, zest.`acs_deltapeak_`, zest.`acs_a_image`, zest.`acs_b_image`, zest.`acs_theta_image`, zest.`acs_elongation`, zest.`acs_class_star`, zest.`acs_ident`, zest.`acs_se`, zest.`acs_mu_class`, zest.`acs_overlap`, zest.`acs_nearstar`, zest.`acs_mask`, zest.`acs_masked`, zest.`acs_clean`, zest.`acs_unique`, zest.`gg`, zest.`m20`, zest.`cc`, zest.`aa`, zest.`r20`, zest.`r50`, zest.`r80`, zest.`rpet`, zest.`flagrpet`, zest.`flux_gim2d`, zest.`le_flux_gim2d`, zest.`ue_flux_gim2d`, zest.`r_gim2d`, zest.`le_r_gim2d`, zest.`ue_r_gim2d`, zest.`ell_gim2d`, zest.`le_ell_gim2d`, zest.`ue_ell_gim2d`, zest.`pa_gim2d`, zest.`le_pa_gim2d`, zest.`ue_pa_gim2d`, zest.`dx_gim2d`, zest.`le_dx_gim2d`, zest.`ue_dx_gim2d`, zest.`dy_gim2d`, zest.`le_dy_gim2d`, zest.`ue_dy_gim2d`, zest.`sersic_n_gim2d`, zest.`le_n_gim2d`, zest.`ue_n_gim2d`, zest.`r_0p5_gim2d`, zest.`chi_gim2d`, zest.`iter_gim2d`, zest.`pc_1`, zest.`pc_2`, zest.`pc_3`, zest.`type`, zest.`bulg`, zest.`irre`, zest.`elli`, zest.`stellarity`, zest.`junkflag`, zest.`acstile`
 +
FROM cosmohub.zest_v1_0 as zest
 +
JOIN jcarrete.zest_cosmos2015_spatial_matching_csv as link
 +
ON zest.`sequentialid` = link.`sequentialid`
 +
;
 +
</pre>
 +
 +
 +
And here is a test:
 +
<pre>
 +
SELECT zest.sequentialid, zest.cosmos2015_id, cosmos.id, zest.ra, cosmos.alpha_j2000, zest.dec, cosmos.delta_j2000, zest.cosmos2015_distance
 +
FROM cosmohub.`zest_v1_0_cosmos2015_join` as zest
 +
JOIN cosmohub.cosmos_2015_dr_2_1 as cosmos
 +
ON zest.cosmos2015_id = cosmos.id
 +
LIMIT 5;
 
</pre>
 
</pre>

Revision as of 10:09, 14 October 2019

Data

Downloaded on 2019-02-18 from https://irsa.ipac.caltech.edu/data/COSMOS/tables/morphology/cosmos_morph_zurich_1.0.tbl

Converted to CSV using LibreOffice Calc.

SQL

CREATE EXTERNAL TABLE tallada.zest (
    `SequentialID` INT COMMENT "Identification number",
    `RA` FLOAT COMMENT "Right ascension (degrees)",
    `DEC` FLOAT COMMENT "Declination (degrees)",
    `CAPAK_ID` FLOAT COMMENT "ID based on ground-based photometric catalog of Capak 2006",
    `CAPAK_RA` FLOAT COMMENT "Closest matched right ascension to ground-based catalog within 0.6\" (degrees)",
    `CAPAK_DEC` FLOAT COMMENT "Closest matched declination to ground-based catalog within 0.6\" (degrees)",
    `ACS_MAG_AUTO` FLOAT COMMENT "Total magnitude of object- used for catalog selection",
    `ACS_MAGERR_AUTO` FLOAT COMMENT "Magnitude error",
    `ACS_X_IMAGE` FLOAT COMMENT "X-pixel position on ACS-tile",
    `ACS_Y_IMAGE` FLOAT COMMENT "Y-pixel position on ACS-tile",
    `ACS_XPEAK_IMAGE` FLOAT COMMENT "X-pixel position of object peak flux on ACS tile",
    `ACS_YPEAK_IMAGE` FLOAT COMMENT "Y-pixel position of object peak flux on ACS tile",
    `ACS_ALPHAPEAK_` FLOAT COMMENT "RA of object peak flux (degrees)",
    `ACS_DELTAPEAK_` FLOAT COMMENT "Dec of object peak flux (degrees)",
    `ACS_A_IMAGE` FLOAT COMMENT "SExtractor semi-major axis",
    `ACS_B_IMAGE` FLOAT COMMENT "SExtractor semi-minor axis",
    `ACS_THETA_IMAGE` FLOAT COMMENT "SExtractor position angle",
    `ACS_ELONGATION` FLOAT COMMENT "SExtractor a/b [semi-major axis / semi minor axis]",
    `ACS_CLASS_STAR` FLOAT COMMENT "SExtractor stellarity parameter",
    `ACS_IDENT` FLOAT COMMENT "Unique number that identifies object",
    `ACS_SE` FLOAT COMMENT "0 = Detection from 'cold' pass of Sextractor, 1 = Detection from 'hot' pass of Sextractor",
    `ACS_MU_CLASS` FLOAT COMMENT "Type of object (1 = galaxy, 2 = star, 3 = spurious)",
    `ACS_OVERLAP` FLOAT COMMENT "Object overlap flag. All objects here have overlap =1 i.e. no overlap.",
    `ACS_NEARSTAR` FLOAT COMMENT "Object in star mask flag. All the objects in this morphological catalog have \"nearstar = 1\" which means they are not in a star mask.",
    `ACS_MASK` FLOAT COMMENT "Object in automatic mask flag: 2 = star used to make automatic mask",
    `ACS_MASKED` FLOAT COMMENT "Object in manual mask flag: -1 = manual mask (astrophysical obj), 0 = manual mask (artifact or image defect), 1 = not in manual mask",
    `ACS_CLEAN` FLOAT COMMENT "Object useable flag: 0 = do not use this object, 1 = use this object",
    `ACS_UNIQUE` FLOAT COMMENT "Object unique flag. All the objects in this morphological catalog have \"unique = 1\" which means they are all \"unique\" in the original ACS catalog.",
    `GG` FLOAT COMMENT "ZEST Gini coefficient",
    `M20` FLOAT COMMENT "ZEST second order moment of the 20% brightest pixels",
    `CC` FLOAT COMMENT "ZEST concentration",
    `AA` FLOAT COMMENT "ZEST asymmetry",
    `R20` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 20% of total light (pixels)",
    `R50` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 50% of total light (pixels)",
    `R80` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 80% of total light (pixels)",
    `RPET` FLOAT COMMENT "Zest semi-major axis length for Petrosian Radius ellipse (pixels)",
    `FLAGRPET` FLOAT COMMENT "Petrosian radius flag: 0 = single Petrosian radius measured, 2 = multiple radii measured (noisy SB profile), 5 = SB profile was corrected for diffuse light contribution from neighboring object(s)",
    `FLUX_GIM2D` FLOAT COMMENT "GIM2D total flux (counts)",
    `LE_FLUX_GIM2D` FLOAT COMMENT "LE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)",
    `UE_FLUX_GIM2D` FLOAT COMMENT "UE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)",
    `R_GIM2D` FLOAT COMMENT "GIM2D psf-convolved half-light radius of object (arcseconds)",
    `LE_R_GIM2D` FLOAT COMMENT "99% conf. lower error on R_GIM2D",
    `UE_R_GIM2D` FLOAT COMMENT "99% conf. upper error on R_GIM2D",
    `ELL_GIM2D` FLOAT COMMENT "GIM2D ellipticity = 1-b/a of object",
    `LE_ELL_GIM2D` FLOAT COMMENT "99% conf. lower error on ELL_GIM2D",
    `UE_ELL_GIM2D` FLOAT COMMENT "99% conf. upper error on ELL_GIM2D",
    `PA_GIM2D` FLOAT COMMENT "GIM2D position angle of object - cw from +y-axis (degrees)",
    `LE_PA_GIM2D` FLOAT COMMENT "99% conf. lower error on PA_GIM2D",
    `UE_PA_GIM2D` FLOAT COMMENT "99% conf. upper error on PA_GIM2D",
    `DX_GIM2D` FLOAT COMMENT "x-offset of GIM2D-model center from ACS-coordinate center (arcseconds)",
    `LE_DX_GIM2D` FLOAT COMMENT "99% conf. lower error on DX_GIM2D",
    `UE_DX_GIM2D` FLOAT COMMENT "99% conf. upper error on DX_GIM2D",
    `DY_GIM2D` FLOAT COMMENT "y-offset of GIM2D-model center from ACS-coordinate center (arcseconds)",
    `LE_DY_GIM2D` FLOAT COMMENT "99% conf. lower error on DY_GIM2D",
    `UE_DY_GIM2D` FLOAT COMMENT "99% conf. upper error on DY_GIM2D",
    `SERSIC_N_GIM2D` FLOAT COMMENT "GIM2D Sersic index",
    `LE_N_GIM2D` FLOAT COMMENT "99% conf. lower error on SERSIC_N_GIM2D",
    `UE_N_GIM2D` FLOAT COMMENT "99% conf. upper error on SERSIC_N_GIM2D",
    `R_0P5_GIM2D` FLOAT COMMENT "GIM2D half-light radius of object without PSF convolution (arcseconds)",
    `CHI_GIM2D` FLOAT COMMENT "reduced CHI2 of the best GIM2D model fit",
    `ITER_GIM2D` FLOAT COMMENT "no. of iterations required for GIM2D fit to converge",
    `PC_1` FLOAT COMMENT "ZEST first principal component",
    `PC_2` FLOAT COMMENT "ZEST second principal component",
    `PC_3` FLOAT COMMENT "ZEST third principal component",
    `TYPE` FLOAT COMMENT "ZEST Type CLASS (1 = Early type, 2 = Disk, 3 = Irregular Galaxy, 9 = no classification)",
    `BULG` FLOAT COMMENT "ZEST \"Bulgeness\" CLASS - only for Type 2 (disk) galaxies: 0 = bulge dominated galaxy, 1,2 = INTermediate-bulge galaxies, 3 = pure disk galaxy, 9 = no classification",
    `IRRE` FLOAT COMMENT "ZEST Irregularity CLASS - only for Type 1 (elliptical) galaxies: 0 = regular, 1 = irregular, 9 = no value",
    `ELLI` FLOAT COMMENT "ZEST Ellipticity CLASS, 0 = fac-on, 1,2 = increasingly higher elongation, 3 = edge-on, 9 = no classification of elongation",
    `STELLARITY` FLOAT COMMENT "Visual Stellarity flag: 0 if ACS_CLASS_STAR<0.6 (object is ASSUMED to be a galaxy, no visual inspection), 0 if ACS_CLASS_STAR>=0.6 AND object visually identified as a galaxy, 1 if ACS_CLASS_STAR>=0.6 AND visually identified as a star, 2 if ACS_CLASS_STAR>=0.8 (object is assumed to be a star and was not visually inspected), 3 if ACS_CLASS_STAR<0.6 but object is visually identified as a star (e.g. saturated star, etc)",
    `JUNKFLAG` FLOAT COMMENT "0 = good object, 1 = spurious",
    `ACSTile` STRING COMMENT "ACS TILE number from which the object is extracted"
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LOCATION '/user/tallada/data/zest'
;
CREATE TABLE cosmohub.zest_v1_0 (
    `SequentialID` INT COMMENT "Identification number",
    `RA` FLOAT COMMENT "Right ascension (degrees)",
    `DEC` FLOAT COMMENT "Declination (degrees)",
    `CAPAK_ID` FLOAT COMMENT "ID based on ground-based photometric catalog of Capak 2006",
    `CAPAK_RA` FLOAT COMMENT "Closest matched right ascension to ground-based catalog within 0.6\" (degrees)",
    `CAPAK_DEC` FLOAT COMMENT "Closest matched declination to ground-based catalog within 0.6\" (degrees)",
    `ACS_MAG_AUTO` FLOAT COMMENT "Total magnitude of object- used for catalog selection",
    `ACS_MAGERR_AUTO` FLOAT COMMENT "Magnitude error",
    `ACS_X_IMAGE` FLOAT COMMENT "X-pixel position on ACS-tile",
    `ACS_Y_IMAGE` FLOAT COMMENT "Y-pixel position on ACS-tile",
    `ACS_XPEAK_IMAGE` FLOAT COMMENT "X-pixel position of object peak flux on ACS tile",
    `ACS_YPEAK_IMAGE` FLOAT COMMENT "Y-pixel position of object peak flux on ACS tile",
    `ACS_ALPHAPEAK_` FLOAT COMMENT "RA of object peak flux (degrees)",
    `ACS_DELTAPEAK_` FLOAT COMMENT "Dec of object peak flux (degrees)",
    `ACS_A_IMAGE` FLOAT COMMENT "SExtractor semi-major axis",
    `ACS_B_IMAGE` FLOAT COMMENT "SExtractor semi-minor axis",
    `ACS_THETA_IMAGE` FLOAT COMMENT "SExtractor position angle",
    `ACS_ELONGATION` FLOAT COMMENT "SExtractor a/b [semi-major axis / semi minor axis]",
    `ACS_CLASS_STAR` FLOAT COMMENT "SExtractor stellarity parameter",
    `ACS_IDENT` FLOAT COMMENT "Unique number that identifies object",
    `ACS_SE` FLOAT COMMENT "0 = Detection from 'cold' pass of Sextractor, 1 = Detection from 'hot' pass of Sextractor",
    `ACS_MU_CLASS` FLOAT COMMENT "Type of object (1 = galaxy, 2 = star, 3 = spurious)",
    `ACS_OVERLAP` FLOAT COMMENT "Object overlap flag. All objects here have overlap =1 i.e. no overlap.",
    `ACS_NEARSTAR` FLOAT COMMENT "Object in star mask flag. All the objects in this morphological catalog have \"nearstar = 1\" which means they are not in a star mask.",
    `ACS_MASK` FLOAT COMMENT "Object in automatic mask flag: 2 = star used to make automatic mask",
    `ACS_MASKED` FLOAT COMMENT "Object in manual mask flag: -1 = manual mask (astrophysical obj), 0 = manual mask (artifact or image defect), 1 = not in manual mask",
    `ACS_CLEAN` FLOAT COMMENT "Object useable flag: 0 = do not use this object, 1 = use this object",
    `ACS_UNIQUE` FLOAT COMMENT "Object unique flag. All the objects in this morphological catalog have \"unique = 1\" which means they are all \"unique\" in the original ACS catalog.",
    `GG` FLOAT COMMENT "ZEST Gini coefficient",
    `M20` FLOAT COMMENT "ZEST second order moment of the 20% brightest pixels",
    `CC` FLOAT COMMENT "ZEST concentration",
    `AA` FLOAT COMMENT "ZEST asymmetry",
    `R20` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 20% of total light (pixels)",
    `R50` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 50% of total light (pixels)",
    `R80` FLOAT COMMENT "ZEST semi-major axis length of ellipse encompassing 80% of total light (pixels)",
    `RPET` FLOAT COMMENT "Zest semi-major axis length for Petrosian Radius ellipse (pixels)",
    `FLAGRPET` FLOAT COMMENT "Petrosian radius flag: 0 = single Petrosian radius measured, 2 = multiple radii measured (noisy SB profile), 5 = SB profile was corrected for diffuse light contribution from neighboring object(s)",
    `FLUX_GIM2D` FLOAT COMMENT "GIM2D total flux (counts)",
    `LE_FLUX_GIM2D` FLOAT COMMENT "LE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)",
    `UE_FLUX_GIM2D` FLOAT COMMENT "UE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)",
    `R_GIM2D` FLOAT COMMENT "GIM2D psf-convolved half-light radius of object (arcseconds)",
    `LE_R_GIM2D` FLOAT COMMENT "99% conf. lower error on R_GIM2D",
    `UE_R_GIM2D` FLOAT COMMENT "99% conf. upper error on R_GIM2D",
    `ELL_GIM2D` FLOAT COMMENT "GIM2D ellipticity = 1-b/a of object",
    `LE_ELL_GIM2D` FLOAT COMMENT "99% conf. lower error on ELL_GIM2D",
    `UE_ELL_GIM2D` FLOAT COMMENT "99% conf. upper error on ELL_GIM2D",
    `PA_GIM2D` FLOAT COMMENT "GIM2D position angle of object - cw from +y-axis (degrees)",
    `LE_PA_GIM2D` FLOAT COMMENT "99% conf. lower error on PA_GIM2D",
    `UE_PA_GIM2D` FLOAT COMMENT "99% conf. upper error on PA_GIM2D",
    `DX_GIM2D` FLOAT COMMENT "x-offset of GIM2D-model center from ACS-coordinate center (arcseconds)",
    `LE_DX_GIM2D` FLOAT COMMENT "99% conf. lower error on DX_GIM2D",
    `UE_DX_GIM2D` FLOAT COMMENT "99% conf. upper error on DX_GIM2D",
    `DY_GIM2D` FLOAT COMMENT "y-offset of GIM2D-model center from ACS-coordinate center (arcseconds)",
    `LE_DY_GIM2D` FLOAT COMMENT "99% conf. lower error on DY_GIM2D",
    `UE_DY_GIM2D` FLOAT COMMENT "99% conf. upper error on DY_GIM2D",
    `SERSIC_N_GIM2D` FLOAT COMMENT "GIM2D Sersic index",
    `LE_N_GIM2D` FLOAT COMMENT "99% conf. lower error on SERSIC_N_GIM2D",
    `UE_N_GIM2D` FLOAT COMMENT "99% conf. upper error on SERSIC_N_GIM2D",
    `R_0P5_GIM2D` FLOAT COMMENT "GIM2D half-light radius of object without PSF convolution (arcseconds)",
    `CHI_GIM2D` FLOAT COMMENT "reduced CHI2 of the best GIM2D model fit",
    `ITER_GIM2D` FLOAT COMMENT "no. of iterations required for GIM2D fit to converge",
    `PC_1` FLOAT COMMENT "ZEST first principal component",
    `PC_2` FLOAT COMMENT "ZEST second principal component",
    `PC_3` FLOAT COMMENT "ZEST third principal component",
    `TYPE` FLOAT COMMENT "ZEST Type CLASS (1 = Early type, 2 = Disk, 3 = Irregular Galaxy, 9 = no classification)",
    `BULG` FLOAT COMMENT "ZEST \"Bulgeness\" CLASS - only for Type 2 (disk) galaxies: 0 = bulge dominated galaxy, 1,2 = INTermediate-bulge galaxies, 3 = pure disk galaxy, 9 = no classification",
    `IRRE` FLOAT COMMENT "ZEST Irregularity CLASS - only for Type 1 (elliptical) galaxies: 0 = regular, 1 = irregular, 9 = no value",
    `ELLI` FLOAT COMMENT "ZEST Ellipticity CLASS, 0 = fac-on, 1,2 = increasingly higher elongation, 3 = edge-on, 9 = no classification of elongation",
    `STELLARITY` FLOAT COMMENT "Visual Stellarity flag: 0 if ACS_CLASS_STAR<0.6 (object is ASSUMED to be a galaxy, no visual inspection), 0 if ACS_CLASS_STAR>=0.6 AND object visually identified as a galaxy, 1 if ACS_CLASS_STAR>=0.6 AND visually identified as a star, 2 if ACS_CLASS_STAR>=0.8 (object is assumed to be a star and was not visually inspected), 3 if ACS_CLASS_STAR<0.6 but object is visually identified as a star (e.g. saturated star, etc)",
    `JUNKFLAG` FLOAT COMMENT "0 = good object, 1 = spurious",
    `ACSTile` STRING COMMENT "ACS TILE number from which the object is extracted"
)
CLUSTERED BY (`SequentialID`)
INTO 4 BUCKETS
STORED AS ORC
;
INSERT OVERWRITE TABLE cosmohub.zest_v1_0
SELECT * FROM tallada.zest
;


UPDATE

I update the catalog to include the cosmos_2015_dr_2_1 index to be able to make joins directly in COSMOHUB.

To do the matching I use a python notebook which can be found here:

> /nfs/pic.es/user/j/jcarrete/python_notebooks/Matching_ZEST_and_COSMOS2015.ipynb

I create a table with the "link" (`sequentialid`, `cosmos2015_id):

CREATE EXTERNAL TABLE jcarrete.zest_cosmos2015_spatial_matching_csv (
sequentialid int,
ra float,
dec float,
cosmos2015_id bigint,
cosmos2015_distance float
)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
WITH SERDEPROPERTIES ( 
  'field.delim'=',', 
  'serialization.format'=',') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://namenode01/user/jcarrete/data/zest_cosmos2015_spatial_matching/'
;

This is the ZEST table updated, which include the COSMOS2015 id:

CREATE TABLE cosmohub.`zest_v1_0_cosmos2015_join`(
  `sequentialid` int COMMENT 'Identification number',
  `cosmos2015_id` bigint COMMENT 'id from COSMOS2015 DR2.1 catalog to be able to JOIN both catalogs',
  `cosmos2015_distance` float COMMENT 'estimated distance from the nearest galaxy when using sklearn.neighbors.KDTree (ra, dec)',
  `ra` float COMMENT 'Right ascension (degrees)', 
  `dec` float COMMENT 'Declination (degrees)', 
  `capak_id` float COMMENT 'ID based on ground-based photometric catalog of Capak 2006', 
  `capak_ra` float COMMENT 'Closest matched right ascension to ground-based catalog within 0.6" (degrees)', 
  `capak_dec` float COMMENT 'Closest matched declination to ground-based catalog within 0.6" (degrees)', 
  `acs_mag_auto` float COMMENT 'Total magnitude of object- used for catalog selection', 
  `acs_magerr_auto` float COMMENT 'Magnitude error', 
  `acs_x_image` float COMMENT 'X-pixel position on ACS-tile', 
  `acs_y_image` float COMMENT 'Y-pixel position on ACS-tile', 
  `acs_xpeak_image` float COMMENT 'X-pixel position of object peak flux on ACS tile', 
  `acs_ypeak_image` float COMMENT 'Y-pixel position of object peak flux on ACS tile', 
  `acs_alphapeak_` float COMMENT 'RA of object peak flux (degrees)', 
  `acs_deltapeak_` float COMMENT 'Dec of object peak flux (degrees)', 
  `acs_a_image` float COMMENT 'SExtractor semi-major axis', 
  `acs_b_image` float COMMENT 'SExtractor semi-minor axis', 
  `acs_theta_image` float COMMENT 'SExtractor position angle', 
  `acs_elongation` float COMMENT 'SExtractor a/b [semi-major axis / semi minor axis]', 
  `acs_class_star` float COMMENT 'SExtractor stellarity parameter', 
  `acs_ident` float COMMENT 'Unique number that identifies object', 
  `acs_se` float COMMENT '0 = Detection from \'cold\' pass of Sextractor, 1 = Detection from \'hot\' pass of Sextractor', 
  `acs_mu_class` float COMMENT 'Type of object (1 = galaxy, 2 = star, 3 = spurious)', 
  `acs_overlap` float COMMENT 'Object overlap flag. All objects here have overlap =1 i.e. no overlap.', 
  `acs_nearstar` float COMMENT 'Object in star mask flag. All the objects in this morphological catalog have "nearstar = 1" which means they are not in a star mask.', 
  `acs_mask` float COMMENT 'Object in automatic mask flag: 2 = star used to make automatic mask', 
  `acs_masked` float COMMENT 'Object in manual mask flag: -1 = manual mask (astrophysical obj), 0 = manual mask (artifact or image defect), 1 = not in manual mask', 
  `acs_clean` float COMMENT 'Object useable flag: 0 = do not use this object, 1 = use this object', 
  `acs_unique` float COMMENT 'Object unique flag. All the objects in this morphological catalog have "unique = 1" which means they are all "unique" in the original ACS catalog.', 
  `gg` float COMMENT 'ZEST Gini coefficient', 
  `m20` float COMMENT 'ZEST second order moment of the 20% brightest pixels', 
  `cc` float COMMENT 'ZEST concentration', 
  `aa` float COMMENT 'ZEST asymmetry', 
  `r20` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 20% of total light (pixels)', 
  `r50` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 50% of total light (pixels)', 
  `r80` float COMMENT 'ZEST semi-major axis length of ellipse encompassing 80% of total light (pixels)', 
  `rpet` float COMMENT 'Zest semi-major axis length for Petrosian Radius ellipse (pixels)', 
  `flagrpet` float COMMENT 'Petrosian radius flag: 0 = single Petrosian radius measured, 2 = multiple radii measured (noisy SB profile), 5 = SB profile was corrected for diffuse light contribution from neighboring object(s)', 
  `flux_gim2d` float COMMENT 'GIM2D total flux (counts)', 
  `le_flux_gim2d` float COMMENT 'LE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)', 
  `ue_flux_gim2d` float COMMENT 'UE_FLUX_GIM2D (99% conf. lower error on R_GIM2D)', 
  `r_gim2d` float COMMENT 'GIM2D psf-convolved half-light radius of object (arcseconds)', 
  `le_r_gim2d` float COMMENT '99% conf. lower error on R_GIM2D', 
  `ue_r_gim2d` float COMMENT '99% conf. upper error on R_GIM2D', 
  `ell_gim2d` float COMMENT 'GIM2D ellipticity = 1-b/a of object', 
  `le_ell_gim2d` float COMMENT '99% conf. lower error on ELL_GIM2D', 
  `ue_ell_gim2d` float COMMENT '99% conf. upper error on ELL_GIM2D', 
  `pa_gim2d` float COMMENT 'GIM2D position angle of object - cw from +y-axis (degrees)', 
  `le_pa_gim2d` float COMMENT '99% conf. lower error on PA_GIM2D', 
  `ue_pa_gim2d` float COMMENT '99% conf. upper error on PA_GIM2D', 
  `dx_gim2d` float COMMENT 'x-offset of GIM2D-model center from ACS-coordinate center (arcseconds)', 
  `le_dx_gim2d` float COMMENT '99% conf. lower error on DX_GIM2D', 
  `ue_dx_gim2d` float COMMENT '99% conf. upper error on DX_GIM2D', 
  `dy_gim2d` float COMMENT 'y-offset of GIM2D-model center from ACS-coordinate center (arcseconds)', 
  `le_dy_gim2d` float COMMENT '99% conf. lower error on DY_GIM2D', 
  `ue_dy_gim2d` float COMMENT '99% conf. upper error on DY_GIM2D', 
  `sersic_n_gim2d` float COMMENT 'GIM2D Sersic index', 
  `le_n_gim2d` float COMMENT '99% conf. lower error on SERSIC_N_GIM2D', 
  `ue_n_gim2d` float COMMENT '99% conf. upper error on SERSIC_N_GIM2D', 
  `r_0p5_gim2d` float COMMENT 'GIM2D half-light radius of object without PSF convolution (arcseconds)', 
  `chi_gim2d` float COMMENT 'reduced CHI2 of the best GIM2D model fit', 
  `iter_gim2d` float COMMENT 'no. of iterations required for GIM2D fit to converge', 
  `pc_1` float COMMENT 'ZEST first principal component', 
  `pc_2` float COMMENT 'ZEST second principal component', 
  `pc_3` float COMMENT 'ZEST third principal component', 
  `type` float COMMENT 'ZEST Type CLASS (1 = Early type, 2 = Disk, 3 = Irregular Galaxy, 9 = no classification)', 
  `bulg` float COMMENT 'ZEST "Bulgeness" CLASS - only for Type 2 (disk) galaxies: 0 = bulge dominated galaxy, 1,2 = INTermediate-bulge galaxies, 3 = pure disk galaxy, 9 = no classification', 
  `irre` float COMMENT 'ZEST Irregularity CLASS - only for Type 1 (elliptical) galaxies: 0 = regular, 1 = irregular, 9 = no value', 
  `elli` float COMMENT 'ZEST Ellipticity CLASS, 0 = fac-on, 1,2 = increasingly higher elongation, 3 = edge-on, 9 = no classification of elongation', 
  `stellarity` float COMMENT 'Visual Stellarity flag: 0 if ACS_CLASS_STAR<0.6 (object is ASSUMED to be a galaxy, no visual inspection), 0 if ACS_CLASS_STAR>=0.6 AND object visually identified as a galaxy, 1 if ACS_CLASS_STAR>=0.6 AND visually identified as a star, 2 if ACS_CLASS_STAR>=0.8 (object is assumed to be a star and was not visually inspected), 3 if ACS_CLASS_STAR<0.6 but object is visually identified as a star (e.g. saturated star, etc)', 
  `junkflag` float COMMENT '0 = good object, 1 = spurious', 
  `acstile` string COMMENT 'ACS TILE number from which the object is extracted')
CLUSTERED BY ( 
  sequentialid) 
INTO 4 BUCKETS
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
;

I materialize the JOIN:

INSERT OVERWRITE TABLE cosmohub.`zest_v1_0_cosmos2015_join`
SELECT zest.`sequentialid`, link.`cosmos2015_id`, link.`cosmos2015_distance`, zest.`ra`, zest.`dec`, zest.`capak_id`, zest.`capak_ra`, zest.`capak_dec`, zest.`acs_mag_auto`, zest.`acs_magerr_auto`, zest.`acs_x_image`, zest.`acs_y_image`, zest.`acs_xpeak_image`, zest.`acs_ypeak_image`, zest.`acs_alphapeak_`, zest.`acs_deltapeak_`, zest.`acs_a_image`, zest.`acs_b_image`, zest.`acs_theta_image`, zest.`acs_elongation`, zest.`acs_class_star`, zest.`acs_ident`, zest.`acs_se`, zest.`acs_mu_class`, zest.`acs_overlap`, zest.`acs_nearstar`, zest.`acs_mask`, zest.`acs_masked`, zest.`acs_clean`, zest.`acs_unique`, zest.`gg`, zest.`m20`, zest.`cc`, zest.`aa`, zest.`r20`, zest.`r50`, zest.`r80`, zest.`rpet`, zest.`flagrpet`, zest.`flux_gim2d`, zest.`le_flux_gim2d`, zest.`ue_flux_gim2d`, zest.`r_gim2d`, zest.`le_r_gim2d`, zest.`ue_r_gim2d`, zest.`ell_gim2d`, zest.`le_ell_gim2d`, zest.`ue_ell_gim2d`, zest.`pa_gim2d`, zest.`le_pa_gim2d`, zest.`ue_pa_gim2d`, zest.`dx_gim2d`, zest.`le_dx_gim2d`, zest.`ue_dx_gim2d`, zest.`dy_gim2d`, zest.`le_dy_gim2d`, zest.`ue_dy_gim2d`, zest.`sersic_n_gim2d`, zest.`le_n_gim2d`, zest.`ue_n_gim2d`, zest.`r_0p5_gim2d`, zest.`chi_gim2d`, zest.`iter_gim2d`, zest.`pc_1`, zest.`pc_2`, zest.`pc_3`, zest.`type`, zest.`bulg`, zest.`irre`, zest.`elli`, zest.`stellarity`, zest.`junkflag`, zest.`acstile`
FROM cosmohub.zest_v1_0 as zest
JOIN jcarrete.zest_cosmos2015_spatial_matching_csv as link
ON zest.`sequentialid` = link.`sequentialid`
;


And here is a test:

SELECT zest.sequentialid, zest.cosmos2015_id, cosmos.id, zest.ra, cosmos.alpha_j2000, zest.dec, cosmos.delta_j2000, zest.cosmos2015_distance 
FROM cosmohub.`zest_v1_0_cosmos2015_join` as zest 
JOIN cosmohub.cosmos_2015_dr_2_1 as cosmos
ON zest.cosmos2015_id = cosmos.id
LIMIT 5;