Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,8 @@
*.egg-info
build/
simple/version.py

scripts/spectra_convert/Zhang18/sty2054_supplemental_files/*
scripts/spectra_convert/Zhang18/SIMPLE/*
.gitignore
scripts/spectra_convert/Zhang18/primeval1-4spextxt.zip
scripts/spectra_convert/Zhang18/sty2054_supplemental_files.zip
3,647 changes: 3,647 additions & 0 deletions scripts/ingests/Zhang18/convert_spectra_zhang18.ipynb

Large diffs are not rendered by default.

94 changes: 94 additions & 0 deletions scripts/ingests/Zhang18/create_spectra_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from astropy.io import fits
from astropy.time import Time
import os
import csv


path = "/Users/guanying/SIMPLE Archieve/SIMPLE-db/scripts/ingests/Zhang18/sty2054_supplemental_files"

csv_file = "/Users/guanying/SIMPLE Archieve/SIMPLE-db/scripts/ingests/Zhang18/zhang18_spectra.csv"

csv_rows = []

for filename in os.listdir(path):
if not filename.endswith(".fits"):
continue

file_path = os.path.join(path, filename)
print(f"\nReading: {filename}")

sourceName = "_".join(filename.split("_")[0:2])

with fits.open(file_path) as hdul:
header = hdul[0].header

# Get metadata from header
instrument_ = header.get("INSTRUME", None)
telescope_ = header.get("TELESCOP", None)
obs_date_ = header.get("DATE-OBS", None)
mode_ = header.get("INSMODE", None)

# Find missing values from header
if (instrument_ == "XSHOOTER") or "xshooter" in filename.lower():
instrument_ = "XShooter"
telescope_ = "ESO VLT"
mode_ = "Echelle"
elif instrument_ == "OSIRIS" or "osiris" in filename.lower():
instrument_ = "OSIRIS"
telescope_ = "GTC"
mode_ = "Missing"

# Some of the mode(regime) is not available in the header
# VIS: range 0.559.5-1.024 µm --- optical
# NIR: 1.024-2.480 µm --- nir
# UVB: 0.300-0.559.5 µm -- optical
# OSIRIS: 0.365 to 1.05 µm --- optical

filename_lower = filename.lower()
if instrument_ == "XShooter":
if "nir" in filename_lower:
regime = "nir"
elif "vis" in filename_lower or "uvb" in filename_lower:
regime = "optical"
elif instrument_ == "OSIRIS":
regime = "optical"

# Some date format is not standard
try:
obs_date = Time(obs_date_).to_datetime().isoformat()
except Exception:
obs_date = None

# prepare rows for csv file
csv_rows.append({
"source": sourceName,
"access_url": file_path,
"regime": regime,
"telescope": telescope_,
"instrument": instrument_,
"mode": mode_,
"observation_date": obs_date,
"comments": None,
"reference": "Zhan18.1352"
})

# Create csv file
with open(csv_file, mode='w', newline='') as file:
fieldnames = [
"source",
"access_url",
"regime",
"telescope",
"instrument",
"mode",
"observation_date",
"comments",
"reference"
]
writer = csv.DictWriter(file, fieldnames=csv_rows[0].keys())
writer.writeheader()
writer.writerows(csv_rows)

print(f"CSV file '{csv_file}' created with {len(csv_rows)} entries.")


92 changes: 92 additions & 0 deletions scripts/ingests/Zhang18/ingest_Spectra_zhang18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging
from astrodb_utils import load_astrodb, AstroDBError
from simple import REFERENCE_TABLES
from simple.utils.spectra import ingest_spectrum
from astropy.io import ascii
import sys

# Set up logger
astrodb_utils_logger = logging.getLogger("astrodb_utils")
astrodb_utils_logger.setLevel(logging.INFO)

# set up zhang logger
# Reference: Zhan18.1352
logger = logging.getLogger(
"astrodb_utils.zhang18"
)
logger.setLevel(logging.INFO)

recreate_db = True
save_db = False

# Load Database
db = load_astrodb(
"SIMPLE.sqlite",
recreatedb=recreate_db,
reference_tables=REFERENCE_TABLES,
felis_schema="simple/schema.yaml"
)

path = "scripts/ingests/Zhang18/zhang18_spectra.csv"
sys.path.append(".")

def ingest_zhang_spectra():
spec_table = ascii.read(
path,
format="csv",
data_start=1,
header_start=0,
guess=False,
fast_reader=False,
delimiter=","
)
spectra_added = 0
skipped = 0
inaccessible = 0

for row in spec_table:
sourceName = row["source"]
file_path = row["access_url"]
logger.info(f"Processing {sourceName}")

try:
ingest_spectrum(
db,
source=sourceName,
access_url=file_path,
regime=row["regime"],
telescope=row["telescope"],
instrument=row["instrument"],
mode=row["mode"],
obs_date=row["observation_date"],
comments=row["comments"],
reference="Zhan18.1352"
)
spectra_added += 1
logger.info(f"Successfully added spectrum for {sourceName}.")

except AstroDBError as e:
if "Spectrum already exists" in str(e):
logger.warning(f"Skipping {sourceName}: Spectrum already exists.")
skipped += 1
else:
inaccessible += 1
logger.info(f"Inaccessible data: {inaccessible}")
print(f"Inaccessible data for {sourceName}: {e}")
except Exception as e:
logger.warning(f"Skipping {sourceName} due to invalid URL path: {file_path}")
skipped += 1

logger.info(f"Total spectra added: {spectra_added}")
logger.info(f"Total spectra skipped: {skipped}")
logger.info(f"Total inaccessible spectra: {inaccessible}")

# call ingest function
ingest_zhang_spectra()

# save to SIMPLE database
if save_db:
db.save_database(directory="data/")



Loading