Difference between revisions of "PAUS+COSMOS photoz v0.2"
Jump to navigation
Jump to search
(Created page with "Ingesting Alex catalog used in his paper: https://pausurvey.org/wiki/index.php/Main_Page/Papers/BayesEvZ There is an email with subject: 'Catalogo photo-z PAUS+COSMOS para...") |
|||
Line 109: | Line 109: | ||
[jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet | [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet | ||
[jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv | [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv | ||
+ | |||
+ | |||
+ | |||
+ | == Version 0.3 == | ||
+ | |||
+ | Data are in: | ||
+ | |||
+ | /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3 | ||
+ | |||
+ | Copy to hdfs path: | ||
+ | |||
+ | hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PIC_v0.3.parquet /user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3/ | ||
+ | |||
+ | Create external table: | ||
+ | |||
+ | CREATE EXTERNAL TABLE jcarrete.pau_cosmos_photoz_v0_3_p ( | ||
+ | `ref_id` BIGINT, | ||
+ | `I_auto` DOUBLE, | ||
+ | `photoz` DOUBLE, | ||
+ | `ra` DOUBLE, | ||
+ | `dec` DOUBLE, | ||
+ | `nbands` BIGINT, | ||
+ | `id_laigle` BIGINT, | ||
+ | `zspec_mean` DOUBLE, | ||
+ | `zspec_std` DOUBLE, | ||
+ | `best_model` DOUBLE, | ||
+ | `best_extlaw` BIGINT, | ||
+ | `best_continuum` DOUBLE, | ||
+ | `best_ebv` DOUBLE, | ||
+ | `best_type` DOUBLE, | ||
+ | `MUV` DOUBLE, | ||
+ | `MU` DOUBLE, | ||
+ | `MR` DOUBLE, | ||
+ | `MI` DOUBLE, | ||
+ | `flux_OII` DOUBLE, | ||
+ | `flux_Hbeta` DOUBLE, | ||
+ | `flux_OIII` DOUBLE, | ||
+ | `flux_Halpha` DOUBLE, | ||
+ | `flux_err_OII` DOUBLE, | ||
+ | `flux_err_Hbeta` DOUBLE, | ||
+ | `flux_err_OIII` DOUBLE, | ||
+ | `flux_err_Halpha` DOUBLE, | ||
+ | `line_coverage_OII` DOUBLE, | ||
+ | `line_coverage_Hbeta` DOUBLE, | ||
+ | `line_coverage_OIII` DOUBLE, | ||
+ | `line_coverage_Halpha` DOUBLE | ||
+ | ) | ||
+ | STORED AS PARQUET | ||
+ | LOCATION '/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3' | ||
+ | ; | ||
+ | |||
+ | CREATE TABLE cosmohub.pau_cosmos_photoz_v0_3_c( | ||
+ | `ref_id` bigint COMMENT 'PAUdm reference ID', | ||
+ | `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', | ||
+ | `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', | ||
+ | `ra` double COMMENT 'Right ascention', | ||
+ | `dec` double COMMENT 'Declination', | ||
+ | `nbands` bigint COMMENT 'Number of bands used in the photo-z code', | ||
+ | `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', | ||
+ | `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', | ||
+ | `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', | ||
+ | `best_model` double COMMENT 'Best model in the photo-z code', | ||
+ | `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction\; 1:Prevot\; 2:Calzetti\; 3:Calzetti+Bump1\; 4:Calzetti+Bump2', | ||
+ | `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', | ||
+ | `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', | ||
+ | `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', | ||
+ | `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', | ||
+ | `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', | ||
+ | `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', | ||
+ | `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', | ||
+ | `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', | ||
+ | `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', | ||
+ | `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', | ||
+ | `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', | ||
+ | `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged') | ||
+ | CLUSTERED BY ( | ||
+ | ref_id) | ||
+ | SORTED BY ( | ||
+ | ref_id ASC) | ||
+ | INTO 4 BUCKETS | ||
+ | ROW FORMAT SERDE | ||
+ | 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' | ||
+ | STORED AS INPUTFORMAT | ||
+ | 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' | ||
+ | OUTPUTFORMAT | ||
+ | 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' | ||
+ | ; | ||
+ | |||
+ | INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_3_c SELECT * FROM jcarrete.pau_cosmos_photoz_v0_3_p; | ||
+ | |||
+ | ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_3_c COMPUTE STATISTICS FOR COLUMNS; | ||
+ | |||
+ | [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet |
Latest revision as of 08:17, 17 July 2020
Ingesting Alex catalog used in his paper:
https://pausurvey.org/wiki/index.php/Main_Page/Papers/BayesEvZ
There is an email with subject: 'Catalogo photo-z PAUS+COSMOS para cosmohub' where the iteration with Alex is described.
The external table is created by Pau using the "standard" script for parquet files:
CREATE EXTERNAL TABLE `tallada.pau_cosmos_photoz_v0_2`( `ref_id` bigint, `i_auto` double, `photoz` double, `ra` double, `dec` double, `nbands` bigint, `id_laigle` bigint, `zspec_mean` double, `zspec_std` double, `best_model` double, `best_extlaw` bigint, `best_continuum` double, `best_ebv` double, `best_type` double, `muv` double, `mu` double, `mr` double, `mi` double, `flux_oii` double, `flux_hbeta` double, `flux_oiii` double, `flux_halpha` double, `flux_err_oii` double, `flux_err_hbeta` double, `flux_err_oiii` double, `flux_err_halpha` double, `line_coverage_oii` double, `line_coverage_hbeta` double, `line_coverage_oiii` double, `line_coverage_halpha` double) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION 'hdfs://AntNest/user/tallada/data/pau/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2' ;
I create the internal table in CosmoHub:
CREATE TABLE `pau_cosmos_photoz_v0_2_c`( `ref_id` bigint COMMENT 'PAUdm reference ID', `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', `ra` double COMMENT 'Right ascention', `dec` double COMMENT 'Declination', `nbands` bigint COMMENT 'Number of bands used in the photo-z code', `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', `best_model` double COMMENT 'Best model in the photo-z code', `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction; 1:Prevot; 2:Calzetti; 3:Calzetti+Bump1; 4:Calzetti+Bump2', `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged' ) CLUSTERED BY ( ref_id ) SORTED BY ( ref_id ASC ) INTO 4 BUCKETS ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_2_c SELECT * FROM tallada.pau_cosmos_photoz_v0_2;
ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_2_c COMPUTE STATISTICS FOR COLUMNS;
Copy the P(z) file into the value_added_data path:
[jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv
Version 0.3
Data are in:
/cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3
Copy to hdfs path:
hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PIC_v0.3.parquet /user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3/
Create external table:
CREATE EXTERNAL TABLE jcarrete.pau_cosmos_photoz_v0_3_p ( `ref_id` BIGINT, `I_auto` DOUBLE, `photoz` DOUBLE, `ra` DOUBLE, `dec` DOUBLE, `nbands` BIGINT, `id_laigle` BIGINT, `zspec_mean` DOUBLE, `zspec_std` DOUBLE, `best_model` DOUBLE, `best_extlaw` BIGINT, `best_continuum` DOUBLE, `best_ebv` DOUBLE, `best_type` DOUBLE, `MUV` DOUBLE, `MU` DOUBLE, `MR` DOUBLE, `MI` DOUBLE, `flux_OII` DOUBLE, `flux_Hbeta` DOUBLE, `flux_OIII` DOUBLE, `flux_Halpha` DOUBLE, `flux_err_OII` DOUBLE, `flux_err_Hbeta` DOUBLE, `flux_err_OIII` DOUBLE, `flux_err_Halpha` DOUBLE, `line_coverage_OII` DOUBLE, `line_coverage_Hbeta` DOUBLE, `line_coverage_OIII` DOUBLE, `line_coverage_Halpha` DOUBLE ) STORED AS PARQUET LOCATION '/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3' ;
CREATE TABLE cosmohub.pau_cosmos_photoz_v0_3_c( `ref_id` bigint COMMENT 'PAUdm reference ID', `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', `ra` double COMMENT 'Right ascention', `dec` double COMMENT 'Declination', `nbands` bigint COMMENT 'Number of bands used in the photo-z code', `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', `best_model` double COMMENT 'Best model in the photo-z code', `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction\; 1:Prevot\; 2:Calzetti\; 3:Calzetti+Bump1\; 4:Calzetti+Bump2', `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged') CLUSTERED BY ( ref_id) SORTED BY ( ref_id ASC) INTO 4 BUCKETS ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_3_c SELECT * FROM jcarrete.pau_cosmos_photoz_v0_3_p;
ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_3_c COMPUTE STATISTICS FOR COLUMNS;
[jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet