Difference between revisions of "PAUS+COSMOS photoz"

From Public PIC Wiki
Jump to navigation Jump to search
(Created page with "== Version 0.2 == Ingesting Alex catalog used in his paper: https://pausurvey.org/wiki/index.php/Main_Page/Papers/BayesEvZ There is an email with subject: 'Catalogo photo-...")
 
Line 211: Line 211:
  
 
   [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet
 
   [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet
 +
 +
 +
 +
== Version 0.4 ==
 +
 +
Data are in:
 +
 +
    /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4
 +
 +
Copy to hdfs path:
 +
 +
    hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4/PAU_COSMOS_photoz_catalog_PIC_v0.4.parquet /user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4/
 +
 +
 +
    [jcarrete@data ~]$ /software/astro/scripts/create_table_from_parquet.sh -p /hdfs/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4/PAU_COSMOS_photoz_catalog_PIC_v0.4.parquet -t jcarrete.pau_cosmos_photoz_v0_4_p -e
 +
 +
 +
    CREATE EXTERNAL TABLE jcarrete.pau_cosmos_photoz_v0_4_p (
 +
    `ref_id` BIGINT,
 +
    `I_auto` DOUBLE,
 +
    `photoz` DOUBLE,
 +
    `ra` DOUBLE,
 +
    `dec` DOUBLE,
 +
    `nbands` BIGINT,
 +
    `id_laigle` BIGINT,
 +
    `zspec_mean` DOUBLE,
 +
    `zspec_std` DOUBLE,
 +
    `best_model` DOUBLE,
 +
    `best_extlaw` BIGINT,
 +
    `best_continuum` DOUBLE,
 +
    `best_ebv` DOUBLE,
 +
    `best_type` DOUBLE,
 +
    `MUV` DOUBLE,
 +
    `MU` DOUBLE,
 +
    `MR` DOUBLE,
 +
    `MI` DOUBLE,
 +
    `flux_OII` DOUBLE,
 +
    `flux_Hbeta` DOUBLE,
 +
    `flux_OIII` DOUBLE,
 +
    `flux_Halpha` DOUBLE,
 +
    `flux_err_OII` DOUBLE,
 +
    `flux_err_Hbeta` DOUBLE,
 +
    `flux_err_OIII` DOUBLE,
 +
    `flux_err_Halpha` DOUBLE,
 +
    `line_coverage_OII` DOUBLE,
 +
    `line_coverage_Hbeta` DOUBLE,
 +
    `line_coverage_OIII` DOUBLE,
 +
    `line_coverage_Halpha` DOUBLE
 +
    )
 +
    STORED AS PARQUET
 +
    LOCATION '/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4'
 +
    ;
 +
 +
 +
    CREATE TABLE cosmohub.pau_cosmos_photoz_v0_4_c(
 +
  `ref_id` bigint COMMENT 'PAUdm reference ID',
 +
  `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog',
 +
  `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates',
 +
  `ra` double COMMENT 'Right ascention',
 +
  `dec` double COMMENT 'Declination',
 +
  `nbands` bigint COMMENT 'Number of bands used in the photo-z code',
 +
  `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID',
 +
  `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)',
 +
  `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)',
 +
  `best_model` double COMMENT 'Best model in the photo-z code',
 +
  `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction\; 1:Prevot\; 2:Calzetti\; 3:Calzetti+Bump1\; 4:Calzetti+Bump2',
 +
  `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)',
 +
  `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)',
 +
  `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)',
 +
  `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction',
 +
  `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction',
 +
  `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction',
 +
  `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction',
 +
  `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',
 +
  `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged',
 +
  `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged',
 +
  `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged',
 +
  `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged')
 +
    CLUSTERED BY (
 +
  ref_id)
 +
    SORTED BY (
 +
  ref_id ASC)
 +
    INTO 4 BUCKETS
 +
    ROW FORMAT SERDE
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 +
    STORED AS INPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
 +
    OUTPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
 +
    ;
 +
 +
    INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_4_c SELECT * FROM jcarrete.pau_cosmos_photoz_v0_4_p;
 +
 +
    ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_4_c COMPUTE STATISTICS FOR COLUMNS;
 +
 +
  [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.4.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.4.parquet

Revision as of 12:03, 21 July 2020

Version 0.2

Ingesting Alex catalog used in his paper:

https://pausurvey.org/wiki/index.php/Main_Page/Papers/BayesEvZ

There is an email with subject: 'Catalogo photo-z PAUS+COSMOS para cosmohub' where the iteration with Alex is described.

The external table is created by Pau using the "standard" script for parquet files:

   CREATE EXTERNAL TABLE `tallada.pau_cosmos_photoz_v0_2`(
 `ref_id` bigint, 
 `i_auto` double, 
 `photoz` double, 
 `ra` double, 
 `dec` double, 
 `nbands` bigint, 
 `id_laigle` bigint, 
 `zspec_mean` double, 
 `zspec_std` double, 
 `best_model` double, 
 `best_extlaw` bigint, 
 `best_continuum` double, 
 `best_ebv` double, 
 `best_type` double, 
 `muv` double, 
 `mu` double, 
 `mr` double, 
 `mi` double, 
 `flux_oii` double, 
 `flux_hbeta` double, 
 `flux_oiii` double, 
 `flux_halpha` double, 
 `flux_err_oii` double, 
 `flux_err_hbeta` double, 
 `flux_err_oiii` double, 
 `flux_err_halpha` double, 
 `line_coverage_oii` double, 
 `line_coverage_hbeta` double, 
 `line_coverage_oiii` double, 
 `line_coverage_halpha` double)
   ROW FORMAT SERDE 
 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
   STORED AS INPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
   OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
   LOCATION
 'hdfs://AntNest/user/tallada/data/pau/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2'
   ;

I create the internal table in CosmoHub:

   CREATE TABLE `pau_cosmos_photoz_v0_2_c`(
 `ref_id` bigint COMMENT 'PAUdm reference ID', 
 `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', 
 `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', 
 `ra` double COMMENT 'Right ascention', 
 `dec` double COMMENT 'Declination', 
 `nbands` bigint COMMENT 'Number of bands used in the photo-z code', 
 `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', 
 `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', 
 `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', 
 `best_model` double COMMENT 'Best model in the photo-z code', 
 `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction; 1:Prevot; 2:Calzetti; 3:Calzetti+Bump1; 4:Calzetti+Bump2', 
 `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', 
 `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', 
 `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', 
 `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', 
 `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', 
 `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', 
 `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', 
 `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction',  
 `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged'
   )
   CLUSTERED BY ( 
   ref_id
   ) 
   SORTED BY ( 
   ref_id ASC
   ) 
   INTO 4 BUCKETS
   ROW FORMAT SERDE 
 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
   STORED AS INPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
   OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
   ;
   INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_2_c
   SELECT * FROM tallada.pau_cosmos_photoz_v0_2;
   ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_2_c COMPUTE STATISTICS FOR COLUMNS;


Copy the P(z) file into the value_added_data path:

   [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.2.parquet
   [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.2/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PIC_PZ_v0.2_column_description.csv


Version 0.3

Data are in:

   /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3

Copy to hdfs path:

   hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PIC_v0.3.parquet /user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3/

Create external table:

   CREATE EXTERNAL TABLE jcarrete.pau_cosmos_photoz_v0_3_p (
   `ref_id` BIGINT,
   `I_auto` DOUBLE,
   `photoz` DOUBLE,
   `ra` DOUBLE,
   `dec` DOUBLE,
   `nbands` BIGINT,
   `id_laigle` BIGINT,
   `zspec_mean` DOUBLE,
   `zspec_std` DOUBLE,
   `best_model` DOUBLE,
   `best_extlaw` BIGINT,
   `best_continuum` DOUBLE,
   `best_ebv` DOUBLE,
   `best_type` DOUBLE,
   `MUV` DOUBLE,
   `MU` DOUBLE,
   `MR` DOUBLE,
   `MI` DOUBLE,
   `flux_OII` DOUBLE,
   `flux_Hbeta` DOUBLE,
   `flux_OIII` DOUBLE,
   `flux_Halpha` DOUBLE,
   `flux_err_OII` DOUBLE,
   `flux_err_Hbeta` DOUBLE,
   `flux_err_OIII` DOUBLE,
   `flux_err_Halpha` DOUBLE,
   `line_coverage_OII` DOUBLE,
   `line_coverage_Hbeta` DOUBLE,
   `line_coverage_OIII` DOUBLE,
   `line_coverage_Halpha` DOUBLE
   )
   STORED AS PARQUET
   LOCATION '/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.3'
   ;
   CREATE TABLE cosmohub.pau_cosmos_photoz_v0_3_c(
 `ref_id` bigint COMMENT 'PAUdm reference ID', 
 `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', 
 `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', 
 `ra` double COMMENT 'Right ascention', 
 `dec` double COMMENT 'Declination', 
 `nbands` bigint COMMENT 'Number of bands used in the photo-z code', 
 `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', 
 `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', 
 `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', 
 `best_model` double COMMENT 'Best model in the photo-z code', 
 `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction\; 1:Prevot\; 2:Calzetti\; 3:Calzetti+Bump1\; 4:Calzetti+Bump2', 
 `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', 
 `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', 
 `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', 
 `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', 
 `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', 
 `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', 
 `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', 
 `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged')
   CLUSTERED BY ( 
 ref_id) 
   SORTED BY ( 
 ref_id ASC) 
   INTO 4 BUCKETS
   ROW FORMAT SERDE 
 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
   STORED AS INPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
   OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
   ;
   INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_3_c SELECT * FROM jcarrete.pau_cosmos_photoz_v0_3_p;
   ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_3_c COMPUTE STATISTICS FOR COLUMNS;
  [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.3/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.3.parquet


Version 0.4

Data are in:

   /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4

Copy to hdfs path:

   hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4/PAU_COSMOS_photoz_catalog_PIC_v0.4.parquet /user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4/


   [jcarrete@data ~]$ /software/astro/scripts/create_table_from_parquet.sh -p /hdfs/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4/PAU_COSMOS_photoz_catalog_PIC_v0.4.parquet -t jcarrete.pau_cosmos_photoz_v0_4_p -e


   CREATE EXTERNAL TABLE jcarrete.pau_cosmos_photoz_v0_4_p (
   `ref_id` BIGINT,
   `I_auto` DOUBLE,
   `photoz` DOUBLE,
   `ra` DOUBLE,
   `dec` DOUBLE,
   `nbands` BIGINT,
   `id_laigle` BIGINT,
   `zspec_mean` DOUBLE,
   `zspec_std` DOUBLE,
   `best_model` DOUBLE,
   `best_extlaw` BIGINT,
   `best_continuum` DOUBLE,
   `best_ebv` DOUBLE,
   `best_type` DOUBLE,
   `MUV` DOUBLE,
   `MU` DOUBLE,
   `MR` DOUBLE,
   `MI` DOUBLE,
   `flux_OII` DOUBLE,
   `flux_Hbeta` DOUBLE,
   `flux_OIII` DOUBLE,
   `flux_Halpha` DOUBLE,
   `flux_err_OII` DOUBLE,
   `flux_err_Hbeta` DOUBLE,
   `flux_err_OIII` DOUBLE,
   `flux_err_Halpha` DOUBLE,
   `line_coverage_OII` DOUBLE,
   `line_coverage_Hbeta` DOUBLE,
   `line_coverage_OIII` DOUBLE,
   `line_coverage_Halpha` DOUBLE
   )
   STORED AS PARQUET
   LOCATION '/user/jcarrete/data/paudm/paus_cosmos_photoz_v0.4'
   ;


   CREATE TABLE cosmohub.pau_cosmos_photoz_v0_4_c(
 `ref_id` bigint COMMENT 'PAUdm reference ID', 
 `i_auto` double COMMENT 'Auto i-band magnitude from Ilbert+09 cosmos photo-z catalog', 
 `photoz` double COMMENT 'Photo-z, defined as the mode of the p(z) of each object obtained from the templates', 
 `ra` double COMMENT 'Right ascention', 
 `dec` double COMMENT 'Declination', 
 `nbands` bigint COMMENT 'Number of bands used in the photo-z code', 
 `id_laigle` bigint COMMENT 'COSMOS2015 (Laigle+15) reference ID', 
 `zspec_mean` double COMMENT 'Average spectroscopic redshift from several public redshift surveys (when available)', 
 `zspec_std` double COMMENT 'Standard deviation spectroscopic redshift from several public redshift surveys (when available)', 
 `best_model` double COMMENT 'Best model in the photo-z code', 
 `best_extlaw` bigint COMMENT 'Best extinction law in the photo-z code (0:No extinction\; 1:Prevot\; 2:Calzetti\; 3:Calzetti+Bump1\; 4:Calzetti+Bump2', 
 `best_continuum` double COMMENT 'Best continuum template group (see Table 1 in Alarcon et.al)', 
 `best_ebv` double COMMENT 'Best extinction E(B-V) value (from 0 to 0.5, in steps of 0.05)', 
 `best_type` double COMMENT 'Best galaxy type (0: red, 1:green, 2:blue)', 
 `muv` double COMMENT 'Absolute magnitude Galex NUV band. Corrected for internal galaxy extinction', 
 `mu` double COMMENT 'Absolute magnitude CFHT U band. Corrected for internal galaxy extinction', 
 `mr` double COMMENT 'Absolute magnitude Subaru R band. Corrected for internal galaxy extinction', 
 `mi` double COMMENT 'Absolute magnitude Subaru I band. Corrected for internal galaxy extinction', 
 `flux_oii` double COMMENT 'Flux for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_hbeta` double COMMENT 'Flux for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_oiii` double COMMENT 'Flux for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_halpha` double COMMENT 'Flux for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_oii` double COMMENT 'Flux error for the OII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_hbeta` double COMMENT 'Flux error for the Hbeta line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_oiii` double COMMENT 'Flux error for the OIII line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `flux_err_halpha` double COMMENT 'Flux error for the Halpha line template in units of 1e-17 erg/s/cm2. Corrected for internal galaxy extinction', 
 `line_coverage_oii` double COMMENT 'Maximum coverage of the OII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_hbeta` double COMMENT 'Maximum coverage of the Hbeta flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_oiii` double COMMENT 'Maximum coverage of the OIII flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged', 
 `line_coverage_halpha` double COMMENT 'Maximum coverage of the Halpha flux line. Range is [0,1], with 0 being poorly covered and 1 great coverage. Measurements with poor coverage are unreliable and should be flagged')
   CLUSTERED BY ( 
 ref_id) 
   SORTED BY ( 
 ref_id ASC) 
   INTO 4 BUCKETS
   ROW FORMAT SERDE 
 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
   STORED AS INPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
   OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
   ;
   INSERT OVERWRITE TABLE cosmohub.pau_cosmos_photoz_v0_4_c SELECT * FROM jcarrete.pau_cosmos_photoz_v0_4_p;
   ANALYZE TABLE cosmohub.pau_cosmos_photoz_v0_4_c COMPUTE STATISTICS FOR COLUMNS;
  [jcarrete@data photoz]$ hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/alarcon/paucosmos_photoz_catalogs/v0.4/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.4.parquet /user/cosmohub/value_added_data/paudm/photoz/PAU_COSMOS_photoz_catalog_PZ_PIC_v0.4.parquet