Difference between revisions of "MIDES Wide Field v6.0"

From Public PIC Wiki
Jump to navigation Jump to search
 
Line 81: Line 81:
 
  AND dnf_zmc_sof >= 0.1 AND dnf_zmc_sof < 1.40
 
  AND dnf_zmc_sof >= 0.1 AND dnf_zmc_sof < 1.40
 
  ;
 
  ;
 +
 +
 +
 +
We decide to ingest the color part before having the photo-z results in order to be used by Eli:
 +
 +
 +
The data is in a file created by Andrea Pocino:
 +
 +
    /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv
 +
 +
which I copy in hdfs:
 +
 +
    hdfs dfs -put /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv /user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/
 +
 +
<pre>
 +
CREATE EXTERNAL TABLE `jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv`(
 +
  `unique_gal_id` bigint COMMENT 'unique galaxy id',
 +
  `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0',
 +
  `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude',
 +
  `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude',
 +
  `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude',
 +
  `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude',
 +
  `g_sof_err` double COMMENT 'error of the resampled g-magnitude',
 +
  `r_sof_err` double COMMENT 'error of the resampled r-magnitude',
 +
  `i_sof_err` double COMMENT 'error of the resampled i-magnitude',
 +
  `z_sof_err` double COMMENT 'error of the resampled z-magnitude'
 +
)
 +
ROW FORMAT SERDE
 +
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
 +
WITH SERDEPROPERTIES (
 +
  'field.delim'=',',
 +
  'serialization.format'=',')
 +
STORED AS INPUTFORMAT
 +
  'org.apache.hadoop.mapred.TextInputFormat'
 +
OUTPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 +
LOCATION
 +
  'hdfs://namenode01/user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/'
 +
;
 +
</pre>
 +
 +
 +
<pre>
 +
CREATE TABLE `cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0`(
 +
  `unique_gal_id` bigint COMMENT 'unique galaxy id',
 +
  `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0',
 +
  `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude',
 +
  `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude',
 +
  `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude',
 +
  `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude',
 +
  `g_sof_err` double COMMENT 'error of the resampled g-magnitude',
 +
  `r_sof_err` double COMMENT 'error of the resampled r-magnitude',
 +
  `i_sof_err` double COMMENT 'error of the resampled i-magnitude',
 +
  `z_sof_err` double COMMENT 'error of the resampled z-magnitude'
 +
)
 +
CLUSTERED BY (
 +
  unique_gal_id)
 +
INTO 64 BUCKETS
 +
ROW FORMAT SERDE
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 +
STORED AS INPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
 +
OUTPUTFORMAT
 +
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
 +
;
 +
</pre>
 +
 +
<pre>
 +
INSERT OVERWRITE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0
 +
SELECT *
 +
FROM jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv
 +
WHERE unique_gal_id IS NOT NULL;
 +
</pre>
 +
 +
<pre>
 +
ANALYZE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 COMPUTE STATISTICS FOR COLUMNS;
 +
</pre>
 +
 +
And then I include the metadata into CosmoHub.
 +
The info is in an email from Andrea Pocino with subject "Consulta Cosmohub".

Latest revision as of 14:41, 16 December 2019

Input data

Materialize partitioned bata by redshift bins

CREATE TABLE apocino.des_sample(
    coadd_object_id bigint,
    sof_cm_mag_corrected_g double,
    sof_cm_mag_corrected_r double,
    sof_cm_mag_corrected_i double,
    sof_cm_mag_corrected_z double,
    sof_cm_mag_err_g double,
    sof_cm_mag_err_r double,
    sof_cm_mag_err_i double,
    sof_cm_mag_err_z double
)
PARTITIONED BY (
    z float
)
CLUSTERED BY (
    coadd_object_id
)
INTO 1 BUCKETS
STORED AS PARQUET
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=0)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 0 AND dnf_zmc_sof < 0.1
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=1.40)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 1.40 AND dnf_zmc_sof < 1.42
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z, 
    ROUND(dnf_zmc_sof, 2)
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 0.1 AND dnf_zmc_sof < 1.40
;


We decide to ingest the color part before having the photo-z results in order to be used by Eli:


The data is in a file created by Andrea Pocino:

   /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv

which I copy in hdfs:

   hdfs dfs -put /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv /user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/
CREATE EXTERNAL TABLE `jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv`(
  `unique_gal_id` bigint COMMENT 'unique galaxy id',
  `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0', 
  `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude', 
  `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude', 
  `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude', 
  `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude', 
  `g_sof_err` double COMMENT 'error of the resampled g-magnitude', 
  `r_sof_err` double COMMENT 'error of the resampled r-magnitude', 
  `i_sof_err` double COMMENT 'error of the resampled i-magnitude', 
  `z_sof_err` double COMMENT 'error of the resampled z-magnitude'
)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
WITH SERDEPROPERTIES ( 
  'field.delim'=',', 
  'serialization.format'=',') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://namenode01/user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/'
;


CREATE TABLE `cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0`(
  `unique_gal_id` bigint COMMENT 'unique galaxy id',
  `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0', 
  `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude', 
  `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude', 
  `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude', 
  `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude', 
  `g_sof_err` double COMMENT 'error of the resampled g-magnitude', 
  `r_sof_err` double COMMENT 'error of the resampled r-magnitude', 
  `i_sof_err` double COMMENT 'error of the resampled i-magnitude', 
  `z_sof_err` double COMMENT 'error of the resampled z-magnitude'
)
CLUSTERED BY ( 
  unique_gal_id) 
INTO 64 BUCKETS
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
;
INSERT OVERWRITE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0
SELECT *
FROM jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv
WHERE unique_gal_id IS NOT NULL;
ANALYZE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 COMPUTE STATISTICS FOR COLUMNS;

And then I include the metadata into CosmoHub. The info is in an email from Andrea Pocino with subject "Consulta Cosmohub".