Difference between revisions of "MIDES Wide Field v6.0"
		
		
		
		
		
		Jump to navigation
		Jump to search
		
				
		
		
	
|  (Created page with "== Input data ==  Materialize partitioned bata by redshift bins   CREATE TABLE apocino.des_sample(      coadd_object_id bigint,      sof_cm_mag_corrected_g double,      sof_cm...") | |||
| (One intermediate revision by one other user not shown) | |||
| Line 24: | Line 24: | ||
|   ; |   ; | ||
| − |   INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=0 | + |   INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=0) | 
|   SELECT |   SELECT | ||
|       coadd_object_id, |       coadd_object_id, | ||
| Line 40: | Line 40: | ||
|   AND sof_cm_mag_corrected_z < 30 |   AND sof_cm_mag_corrected_z < 30 | ||
|   AND dnf_zmean_sof != -9999 |   AND dnf_zmean_sof != -9999 | ||
| − |   AND dnf_zmc_sof > 0 AND dnf_zmc_sof < | + |   AND dnf_zmc_sof >= 0 AND dnf_zmc_sof < 0.1 | 
|   ; |   ; | ||
| − |   INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=1. | + |   INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=1.40) | 
|   SELECT |   SELECT | ||
|       coadd_object_id, |       coadd_object_id, | ||
| Line 59: | Line 59: | ||
|   AND sof_cm_mag_corrected_z < 30 |   AND sof_cm_mag_corrected_z < 30 | ||
|   AND dnf_zmean_sof != -9999 |   AND dnf_zmean_sof != -9999 | ||
| − |   AND dnf_zmc_sof > 1.40 AND dnf_zmc_sof < | + |   AND dnf_zmc_sof >= 1.40 AND dnf_zmc_sof < 1.42 | 
|   ; |   ; | ||
| Line 79: | Line 79: | ||
|   AND sof_cm_mag_corrected_z < 30 |   AND sof_cm_mag_corrected_z < 30 | ||
|   AND dnf_zmean_sof != -9999 |   AND dnf_zmean_sof != -9999 | ||
| − |   AND dnf_zmc_sof > 0.1 AND dnf_zmc_sof < | + |   AND dnf_zmc_sof >= 0.1 AND dnf_zmc_sof < 1.40 | 
|   ; |   ; | ||
| + | |||
| + | |||
| + | |||
| + | We decide to ingest the color part before having the photo-z results in order to be used by Eli: | ||
| + | |||
| + | |||
| + | The data is in a file created by Andrea Pocino: | ||
| + | |||
| + |     /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv | ||
| + | |||
| + | which I copy in hdfs: | ||
| + | |||
| + |     hdfs dfs -put /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv /user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/ | ||
| + | |||
| + | <pre> | ||
| + | CREATE EXTERNAL TABLE `jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv`( | ||
| + |   `unique_gal_id` bigint COMMENT 'unique galaxy id', | ||
| + |   `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0',  | ||
| + |   `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude',  | ||
| + |   `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude',  | ||
| + |   `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude',  | ||
| + |   `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude',  | ||
| + |   `g_sof_err` double COMMENT 'error of the resampled g-magnitude',  | ||
| + |   `r_sof_err` double COMMENT 'error of the resampled r-magnitude',  | ||
| + |   `i_sof_err` double COMMENT 'error of the resampled i-magnitude',  | ||
| + |   `z_sof_err` double COMMENT 'error of the resampled z-magnitude' | ||
| + | ) | ||
| + | ROW FORMAT SERDE  | ||
| + |   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'  | ||
| + | WITH SERDEPROPERTIES (  | ||
| + |   'field.delim'=',',  | ||
| + |   'serialization.format'=',')  | ||
| + | STORED AS INPUTFORMAT  | ||
| + |   'org.apache.hadoop.mapred.TextInputFormat'  | ||
| + | OUTPUTFORMAT  | ||
| + |   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' | ||
| + | LOCATION | ||
| + |   'hdfs://namenode01/user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/' | ||
| + | ; | ||
| + | </pre> | ||
| + | |||
| + | |||
| + | <pre> | ||
| + | CREATE TABLE `cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0`( | ||
| + |   `unique_gal_id` bigint COMMENT 'unique galaxy id', | ||
| + |   `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0',  | ||
| + |   `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude',  | ||
| + |   `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude',  | ||
| + |   `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude',  | ||
| + |   `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude',  | ||
| + |   `g_sof_err` double COMMENT 'error of the resampled g-magnitude',  | ||
| + |   `r_sof_err` double COMMENT 'error of the resampled r-magnitude',  | ||
| + |   `i_sof_err` double COMMENT 'error of the resampled i-magnitude',  | ||
| + |   `z_sof_err` double COMMENT 'error of the resampled z-magnitude' | ||
| + | ) | ||
| + | CLUSTERED BY (  | ||
| + |   unique_gal_id)  | ||
| + | INTO 64 BUCKETS | ||
| + | ROW FORMAT SERDE  | ||
| + |   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'  | ||
| + | STORED AS INPUTFORMAT  | ||
| + |   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'  | ||
| + | OUTPUTFORMAT  | ||
| + |   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' | ||
| + | ; | ||
| + | </pre> | ||
| + | |||
| + | <pre> | ||
| + | INSERT OVERWRITE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 | ||
| + | SELECT * | ||
| + | FROM jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv | ||
| + | WHERE unique_gal_id IS NOT NULL; | ||
| + | </pre> | ||
| + | |||
| + | <pre> | ||
| + | ANALYZE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 COMPUTE STATISTICS FOR COLUMNS; | ||
| + | </pre> | ||
| + | |||
| + | And then I include the metadata into CosmoHub. | ||
| + | The info is in an email from Andrea Pocino with subject "Consulta Cosmohub". | ||
Latest revision as of 14:41, 16 December 2019
Input data
Materialize partitioned bata by redshift bins
CREATE TABLE apocino.des_sample(
    coadd_object_id bigint,
    sof_cm_mag_corrected_g double,
    sof_cm_mag_corrected_r double,
    sof_cm_mag_corrected_i double,
    sof_cm_mag_corrected_z double,
    sof_cm_mag_err_g double,
    sof_cm_mag_err_r double,
    sof_cm_mag_err_i double,
    sof_cm_mag_err_z double
)
PARTITIONED BY (
    z float
)
CLUSTERED BY (
    coadd_object_id
)
INTO 1 BUCKETS
STORED AS PARQUET
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=0)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 0 AND dnf_zmc_sof < 0.1
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z=1.40)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 1.40 AND dnf_zmc_sof < 1.42
;
INSERT OVERWRITE TABLE apocino.des_sample PARTITION(z)
SELECT
    coadd_object_id,
    sof_cm_mag_corrected_g, sof_cm_mag_corrected_r, sof_cm_mag_corrected_i, sof_cm_mag_corrected_z,
    sof_cm_mag_err_g, sof_cm_mag_err_r, sof_cm_mag_err_i, sof_cm_mag_err_z, 
    ROUND(dnf_zmc_sof, 2)
FROM cosmohub.des_y3_gold_v2_2_c
WHERE extended_class_sof = 3
AND ra BETWEEN 20 AND 40 AND dec BETWEEN -28 AND -13
AND sof_cm_mag_corrected_g - sof_cm_mag_corrected_r BETWEEN -1 AND 3
AND sof_cm_mag_corrected_r - sof_cm_mag_corrected_i BETWEEN -1 AND 2.5
AND sof_cm_mag_corrected_i - sof_cm_mag_corrected_z BETWEEN -1 AND 2
AND sof_cm_mag_corrected_g < 30
AND sof_cm_mag_corrected_r < 30
AND sof_cm_mag_corrected_i < 30
AND sof_cm_mag_corrected_z < 30
AND dnf_zmean_sof != -9999
AND dnf_zmc_sof >= 0.1 AND dnf_zmc_sof < 1.40
;
We decide to ingest the color part before having the photo-z results in order to be used by Eli:
The data is in a file created by Andrea Pocino:
/nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv
which I copy in hdfs:
hdfs dfs -put /nfs/astro/apocino/MICE2_resample_DES/MICE_remap_5000sq_by_001_redshift_bins_zmc_binning_i_cut_23.csv /user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/
CREATE EXTERNAL TABLE `jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv`( `unique_gal_id` bigint COMMENT 'unique galaxy id', `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0', `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude', `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude', `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude', `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude', `g_sof_err` double COMMENT 'error of the resampled g-magnitude', `r_sof_err` double COMMENT 'error of the resampled r-magnitude', `i_sof_err` double COMMENT 'error of the resampled i-magnitude', `z_sof_err` double COMMENT 'error of the resampled z-magnitude' ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 'hdfs://namenode01/user/jcarrete/data/des/MICE2_resampled/apocino/v6.0/' ;
CREATE TABLE `cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0`( `unique_gal_id` bigint COMMENT 'unique galaxy id', `z_cgal` double COMMENT 'galaxy true redshift from MICECATv2.0', `g_sof` double COMMENT 'observed g-magnitude resampled from DES Y3Gold2 sof g magnitude', `r_sof` double COMMENT 'observed r-magnitude resampled from DES Y3Gold2 sof r magnitude', `i_sof` double COMMENT 'observed i-magnitude resampled from DES Y3Gold2 sof i magnitude', `z_sof` double COMMENT 'observed z-magnitude resampled from DES Y3Gold2 sof z magnitude', `g_sof_err` double COMMENT 'error of the resampled g-magnitude', `r_sof_err` double COMMENT 'error of the resampled r-magnitude', `i_sof_err` double COMMENT 'error of the resampled i-magnitude', `z_sof_err` double COMMENT 'error of the resampled z-magnitude' ) CLUSTERED BY ( unique_gal_id) INTO 64 BUCKETS ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
INSERT OVERWRITE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 SELECT * FROM jcarrete.mice2_remap_photometry_sof_desy3_gold2_v6_0_csv WHERE unique_gal_id IS NOT NULL;
ANALYZE TABLE cosmohub.mice2_remap_photometry_sof_desy3_gold2_v6_0 COMPUTE STATISTICS FOR COLUMNS;
And then I include the metadata into CosmoHub. The info is in an email from Andrea Pocino with subject "Consulta Cosmohub".