Legacy Survey DR8
Raw data
Downloaded from
ftp://archive.noao.edu:/public/hlsp/ls/dr8/
Directories:
south/tractor north/tractor
Mirrored using lftp commands:
mirror -c -v -P 200 north/tractor/ /mnt/pau/north/tractor/ mirror -c -v -P 200 south/tractor/ /mnt/pau/south/tractor/
into a 5 TiB temporary Ceph filesystem mounted in tdm002.
Checksums provided through sha256sum files, and checked using command:
find * -type d | xargs -t -L1 -P24 sh -c 'cd $1; sha256sum --quiet -c *.sha256sum' cksum
Parquet
The following code snippet was used in spark to convert fits files to parquet:
path_base = '/cephfs/pic.es/cosmohub/legacysurvey' hdfs_base = '/user/tallada/data/legacysurvey' hs = ['h=north', 'h=south'] rs = ['r={0:03d}'.format(x) for x in range(360)] for h in hs: for r in rs: p = os.path.join(path_base, h, r) if not os.path.exists(p): continue size = sum(os.path.getsize(os.path.join(p,f)) for f in os.listdir(p) if os.path.isfile(os.path.join(p,f))) print h, r, size df = spark.read.format("fits").option("hdu", 1).load( "file://" + p ) df = df.repartition(int(math.ceil(size / (256*1024*1024.0)))) df.write.parquet(os.path.join(hdfs_base, h, r))
External table
CREATE EXTERNAL TABLE tallada.dr8 ( `release` smallint, `brickid` int, `brickname` string, `objid` int, `brick_primary` boolean, `brightblob` smallint, `maskbits` smallint, `type` string, `ra` double, `dec` double, `ra_ivar` float, `dec_ivar` float, `bx` float, `by` float, `dchisq` array<float>, `ebv` float, `mjd_min` double, `mjd_max` double, `ref_cat` string, `ref_id` bigint, `pmra` float, `pmdec` float, `parallax` float, `pmra_ivar` float, `pmdec_ivar` float, `parallax_ivar` float, `ref_epoch` float, `gaia_pointsource` boolean, `gaia_phot_g_mean_mag` float, `gaia_phot_g_mean_flux_over_error` float, `gaia_phot_g_n_obs` smallint, `gaia_phot_bp_mean_mag` float, `gaia_phot_bp_mean_flux_over_error` float, `gaia_phot_bp_n_obs` smallint, `gaia_phot_rp_mean_mag` float, `gaia_phot_rp_mean_flux_over_error` float, `gaia_phot_rp_n_obs` smallint, `gaia_phot_variable_flag` boolean, `gaia_astrometric_excess_noise` float, `gaia_astrometric_excess_noise_sig` float, `gaia_astrometric_n_obs_al` smallint, `gaia_astrometric_n_good_obs_al` smallint, `gaia_astrometric_weight_al` float, `gaia_duplicated_source` boolean, `gaia_a_g_val` float, `gaia_e_bp_min_rp_val` float, `gaia_phot_bp_rp_excess_factor` float, `gaia_astrometric_sigma5d_max` float, `gaia_astrometric_params_solved` tinyint, `flux_g` float, `flux_r` float, `flux_z` float, `flux_w1` float, `flux_w2` float, `flux_w3` float, `flux_w4` float, `flux_ivar_g` float, `flux_ivar_r` float, `flux_ivar_z` float, `flux_ivar_w1` float, `flux_ivar_w2` float, `flux_ivar_w3` float, `flux_ivar_w4` float, `fiberflux_g` float, `fiberflux_r` float, `fiberflux_z` float, `fibertotflux_g` float, `fibertotflux_r` float, `fibertotflux_z` float, `apflux_g` array<float>, `apflux_r` array<float>, `apflux_z` array<float>, `apflux_resid_g` array<float>, `apflux_resid_r` array<float>, `apflux_resid_z` array<float>, `apflux_ivar_g` array<float>, `apflux_ivar_r` array<float>, `apflux_ivar_z` array<float>, `mw_transmission_g` float, `mw_transmission_r` float, `mw_transmission_z` float, `mw_transmission_w1` float, `mw_transmission_w2` float, `mw_transmission_w3` float, `mw_transmission_w4` float, `nobs_g` smallint, `nobs_r` smallint, `nobs_z` smallint, `nobs_w1` smallint, `nobs_w2` smallint, `nobs_w3` smallint, `nobs_w4` smallint, `rchisq_g` float, `rchisq_r` float, `rchisq_z` float, `rchisq_w1` float, `rchisq_w2` float, `rchisq_w3` float, `rchisq_w4` float, `fracflux_g` float, `fracflux_r` float, `fracflux_z` float, `fracflux_w1` float, `fracflux_w2` float, `fracflux_w3` float, `fracflux_w4` float, `fracmasked_g` float, `fracmasked_r` float, `fracmasked_z` float, `fracin_g` float, `fracin_r` float, `fracin_z` float, `anymask_g` smallint, `anymask_r` smallint, `anymask_z` smallint, `allmask_g` smallint, `allmask_r` smallint, `allmask_z` smallint, `wisemask_w1` tinyint, `wisemask_w2` tinyint, `psfsize_g` float, `psfsize_r` float, `psfsize_z` float, `psfdepth_g` float, `psfdepth_r` float, `psfdepth_z` float, `galdepth_g` float, `galdepth_r` float, `galdepth_z` float, `psfdepth_w1` float, `psfdepth_w2` float, `psfdepth_w3` float, `psfdepth_w4` float, `wise_coadd_id` string, `lc_flux_w1` array<float>, `lc_flux_w2` array<float>, `lc_flux_ivar_w1` array<float>, `lc_flux_ivar_w2` array<float>, `lc_nobs_w1` array<smallint>, `lc_nobs_w2` array<smallint>, `lc_fracflux_w1` array<float>, `lc_fracflux_w2` array<float>, `lc_rchisq_w1` array<float>, `lc_rchisq_w2` array<float>, `lc_mjd_w1` array<double>, `lc_mjd_w2` array<double>, `fracdev` float, `fracdev_ivar` float, `shapeexp_r` float, `shapeexp_r_ivar` float, `shapeexp_e1` float, `shapeexp_e1_ivar` float, `shapeexp_e2` float, `shapeexp_e2_ivar` float, `shapedev_r` float, `shapedev_r_ivar` float, `shapedev_e1` float, `shapedev_e1_ivar` float, `shapedev_e2` float, `shapedev_e2_ivar` float ) PARTITIONED BY ( h string, r string ) STORED AS PARQUET LOCATION '/user/tallada/data/legacysurvey/dr8/' ;
Internal Table
CREATE TABLE cosmohub.legacy_survey_dr8 ( `release` smallint, `brickid` int, `brickname` string, `objid` int, `brick_primary` boolean, `brightblob` smallint, `maskbits` smallint, `type` string, `ra` double, `dec` double, `ra_ivar` float, `dec_ivar` float, `bx` float, `by` float, `dchisq` array<float>, `ebv` float, `mjd_min` double, `mjd_max` double, `ref_cat` string, `ref_id` bigint, `pmra` float, `pmdec` float, `parallax` float, `pmra_ivar` float, `pmdec_ivar` float, `parallax_ivar` float, `ref_epoch` float, `gaia_pointsource` boolean, `gaia_phot_g_mean_mag` float, `gaia_phot_g_mean_flux_over_error` float, `gaia_phot_g_n_obs` smallint, `gaia_phot_bp_mean_mag` float, `gaia_phot_bp_mean_flux_over_error` float, `gaia_phot_bp_n_obs` smallint, `gaia_phot_rp_mean_mag` float, `gaia_phot_rp_mean_flux_over_error` float, `gaia_phot_rp_n_obs` smallint, `gaia_phot_variable_flag` boolean, `gaia_astrometric_excess_noise` float, `gaia_astrometric_excess_noise_sig` float, `gaia_astrometric_n_obs_al` smallint, `gaia_astrometric_n_good_obs_al` smallint, `gaia_astrometric_weight_al` float, `gaia_duplicated_source` boolean, `gaia_a_g_val` float, `gaia_e_bp_min_rp_val` float, `gaia_phot_bp_rp_excess_factor` float, `gaia_astrometric_sigma5d_max` float, `gaia_astrometric_params_solved` tinyint, `flux_g` float, `flux_r` float, `flux_z` float, `flux_w1` float, `flux_w2` float, `flux_w3` float, `flux_w4` float, `flux_ivar_g` float, `flux_ivar_r` float, `flux_ivar_z` float, `flux_ivar_w1` float, `flux_ivar_w2` float, `flux_ivar_w3` float, `flux_ivar_w4` float, `fiberflux_g` float, `fiberflux_r` float, `fiberflux_z` float, `fibertotflux_g` float, `fibertotflux_r` float, `fibertotflux_z` float, `apflux_g` array<float>, `apflux_r` array<float>, `apflux_z` array<float>, `apflux_resid_g` array<float>, `apflux_resid_r` array<float>, `apflux_resid_z` array<float>, `apflux_ivar_g` array<float>, `apflux_ivar_r` array<float>, `apflux_ivar_z` array<float>, `mw_transmission_g` float, `mw_transmission_r` float, `mw_transmission_z` float, `mw_transmission_w1` float, `mw_transmission_w2` float, `mw_transmission_w3` float, `mw_transmission_w4` float, `nobs_g` smallint, `nobs_r` smallint, `nobs_z` smallint, `nobs_w1` smallint, `nobs_w2` smallint, `nobs_w3` smallint, `nobs_w4` smallint, `rchisq_g` float, `rchisq_r` float, `rchisq_z` float, `rchisq_w1` float, `rchisq_w2` float, `rchisq_w3` float, `rchisq_w4` float, `fracflux_g` float, `fracflux_r` float, `fracflux_z` float, `fracflux_w1` float, `fracflux_w2` float, `fracflux_w3` float, `fracflux_w4` float, `fracmasked_g` float, `fracmasked_r` float, `fracmasked_z` float, `fracin_g` float, `fracin_r` float, `fracin_z` float, `anymask_g` smallint, `anymask_r` smallint, `anymask_z` smallint, `allmask_g` smallint, `allmask_r` smallint, `allmask_z` smallint, `wisemask_w1` tinyint, `wisemask_w2` tinyint, `psfsize_g` float, `psfsize_r` float, `psfsize_z` float, `psfdepth_g` float, `psfdepth_r` float, `psfdepth_z` float, `galdepth_g` float, `galdepth_r` float, `galdepth_z` float, `psfdepth_w1` float, `psfdepth_w2` float, `psfdepth_w3` float, `psfdepth_w4` float, `wise_coadd_id` string, `lc_flux_w1` array<float>, `lc_flux_w2` array<float>, `lc_flux_ivar_w1` array<float>, `lc_flux_ivar_w2` array<float>, `lc_nobs_w1` array<smallint>, `lc_nobs_w2` array<smallint>, `lc_fracflux_w1` array<float>, `lc_fracflux_w2` array<float>, `lc_rchisq_w1` array<float>, `lc_rchisq_w2` array<float>, `lc_mjd_w1` array<double>, `lc_mjd_w2` array<double>, `fracdev` float, `fracdev_ivar` float, `shapeexp_r` float, `shapeexp_r_ivar` float, `shapeexp_e1` float, `shapeexp_e1_ivar` float, `shapeexp_e2` float, `shapeexp_e2_ivar` float, `shapedev_r` float, `shapedev_r_ivar` float, `shapedev_e1` float, `shapedev_e1_ivar` float, `shapedev_e2` float, `shapedev_e2_ivar` float ) clustered by ( release, brickid, objid ) sorted by ( release, brickid, objid ) into 4096 buckets stored as orc ; insert overwrite table cosmohub.legacy_survey_dr8 select release, brickid, brickname, objid, brick_primary, brightblob, maskbits, type, ra, dec, ra_ivar, dec_ivar, bx, `by`, dchisq, ebv, mjd_min, mjd_max, ref_cat, ref_id, pmra, pmdec, parallax, pmra_ivar, pmdec_ivar, parallax_ivar, ref_epoch, gaia_pointsource, gaia_phot_g_mean_mag, gaia_phot_g_mean_flux_over_error, gaia_phot_g_n_obs, gaia_phot_bp_mean_mag, gaia_phot_bp_mean_flux_over_error, gaia_phot_bp_n_obs, gaia_phot_rp_mean_mag, gaia_phot_rp_mean_flux_over_error, gaia_phot_rp_n_obs, gaia_phot_variable_flag, gaia_astrometric_excess_noise, gaia_astrometric_excess_noise_sig, gaia_astrometric_n_obs_al, gaia_astrometric_n_good_obs_al, gaia_astrometric_weight_al, gaia_duplicated_source, gaia_a_g_val, gaia_e_bp_min_rp_val, gaia_phot_bp_rp_excess_factor, gaia_astrometric_sigma5d_max, gaia_astrometric_params_solved, flux_g, flux_r, flux_z, flux_w1, flux_w2, flux_w3, flux_w4, flux_ivar_g, flux_ivar_r, flux_ivar_z, flux_ivar_w1, flux_ivar_w2, flux_ivar_w3, flux_ivar_w4, fiberflux_g, fiberflux_r, fiberflux_z, fibertotflux_g, fibertotflux_r, fibertotflux_z, apflux_g, apflux_r, apflux_z, apflux_resid_g, apflux_resid_r, apflux_resid_z, apflux_ivar_g, apflux_ivar_r, apflux_ivar_z, mw_transmission_g, mw_transmission_r, mw_transmission_z, mw_transmission_w1, mw_transmission_w2, mw_transmission_w3, mw_transmission_w4, nobs_g, nobs_r, nobs_z, nobs_w1, nobs_w2, nobs_w3, nobs_w4, rchisq_g, rchisq_r, rchisq_z, rchisq_w1, rchisq_w2, rchisq_w3, rchisq_w4, fracflux_g, fracflux_r, fracflux_z, fracflux_w1, fracflux_w2, fracflux_w3, fracflux_w4, fracmasked_g, fracmasked_r, fracmasked_z, fracin_g, fracin_r, fracin_z, anymask_g, anymask_r, anymask_z, allmask_g, allmask_r, allmask_z, wisemask_w1, wisemask_w2, psfsize_g, psfsize_r, psfsize_z, psfdepth_g, psfdepth_r, psfdepth_z, galdepth_g, galdepth_r, galdepth_z, psfdepth_w1, psfdepth_w2, psfdepth_w3, psfdepth_w4, wise_coadd_id, lc_flux_w1, lc_flux_w2, lc_flux_ivar_w1, lc_flux_ivar_w2, lc_nobs_w1, lc_nobs_w2, lc_fracflux_w1, lc_fracflux_w2, lc_rchisq_w1, lc_rchisq_w2, lc_mjd_w1, lc_mjd_w2, fracdev, fracdev_ivar, shapeexp_r, shapeexp_r_ivar, shapeexp_e1, shapeexp_e1_ivar, shapeexp_e2, shapeexp_e2_ivar, shapedev_r, shapedev_r_ivar, shapedev_e1, shapedev_e1_ivar, shapedev_e2, shapedev_e2_ivar from tallada.dr8 ;