Participants will likely report that the dominant period from naive Fourier analysis is very long (tens to hundreds of days) rather than the expected 1-day period. This is the pedagogical moment.
The issue: Raw power spectra of time series data typically show "red noise" — power increasing toward low frequencies. The longest periods dominate simply because there's more total variance at low frequencies, not because they represent real periodic signals.
The solution: Estimate the local background power as a function of frequency (using a smoothing filter), then look for peaks that stand significantly above this background. This reveals the true periodic signals: the 1-day diurnal cycle and its harmonics.
Teaching point: Always ask "are we asking the right question?" Before accepting computational results, think about whether the method matches the scientific question. Here, we want periodic signals, not just raw power.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta
from astropy.coordinates import EarthLocation, AltAz, get_sun
from astropy.time import Time
import astropy.units as u
# Load data
df = pd.read_csv('rubin_mirror_temps.csv', parse_dates=['timestamp'])
# Full time series
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(df['timestamp'], df['temperature'], linewidth=0.5)
ax.set_xlabel('Date (UTC)')
ax.set_ylabel('Temperature (°C)')
ax.set_title('Rubin Observatory Primary Mirror Temperature')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('temperature_timeseries.png', dpi=150)
# Histogram
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(df['temperature'], bins=60, edgecolor='black', linewidth=0.5)
ax.set_xlabel('Temperature (°C)')
ax.set_ylabel('Count')
ax.set_title('Temperature Distribution')
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('temperature_histogram.png', dpi=150)
# Last week with sunset markers
location = EarthLocation(lat=-30.24*u.deg, lon=-70.74*u.deg, height=2715*u.m)
last_time = df['timestamp'].max()
week_start = last_time - timedelta(days=7)
week = df[df['timestamp'] >= week_start].copy()
unique_dates = week['timestamp'].dt.date.unique()
sunset_times = []
for date in unique_dates:
t_start = pd.Timestamp(date).replace(hour=21, minute=0)
times_utc = [t_start + timedelta(minutes=m) for m in range(300)]
astropy_times = Time(times_utc)
altaz_frame = AltAz(obstime=astropy_times, location=location)
sun_altitudes = get_sun(astropy_times).transform_to(altaz_frame).alt.deg
for i in range(len(sun_altitudes) - 1):
if sun_altitudes[i] > 0 and sun_altitudes[i + 1] <= 0:
frac = sun_altitudes[i] / (sun_altitudes[i] - sun_altitudes[i + 1])
sunset_utc = times_utc[i] + frac * timedelta(minutes=1)
sunset_times.append(sunset_utc)
break
sunset_temps = []
for st in sunset_times:
idx = (week['timestamp'] - st).abs().idxmin()
sunset_temps.append(week.loc[idx, 'temperature'])
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(week['timestamp'], week['temperature'], linewidth=0.8)
ax.plot(sunset_times, sunset_temps, 'ro', markersize=10, label='Sunset')
ax.set_xlabel('Date (UTC)')
ax.set_ylabel('Temperature (°C)')
ax.set_title('Last Week with Sunset Markers')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('week_with_sunsets.png', dpi=150)
plt.show()
print(f"Max temp: {df['temperature'].max():.2f} °C at {df.loc[df['temperature'].idxmax(), 'timestamp']}")
print(f"Min temp: {df['temperature'].min():.2f} °C at {df.loc[df['temperature'].idxmin(), 'timestamp']}")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import uniform_filter1d
from scipy.signal import find_peaks
# Load data
df = pd.read_csv('rubin_mirror_temps.csv', parse_dates=['timestamp'])
temp = df['temperature'].values
N = len(temp)
dt_hrs = 15.0 / 60.0 # 15 minutes in hours
# Remove mean
temp_centered = temp - np.mean(temp)
# Compute FFT
fft_vals = np.fft.rfft(temp_centered)
power = np.abs(fft_vals) ** 2
freqs = np.fft.rfftfreq(N, d=dt_hrs)
# Convert to periods in days
with np.errstate(divide='ignore'):
periods_days = 1.0 / (freqs * 24.0)
# Skip DC component
freqs = freqs[1:]
power = power[1:]
periods_days = periods_days[1:]
# Estimate local background using smoothing
log_power = np.log10(power + 1e-30)
background = uniform_filter1d(log_power, size=201, mode='nearest')
background_linear = 10.0 ** background
# Signal-to-background ratio
snr = power / background_linear
# Find peaks above background
peak_indices, _ = find_peaks(snr, height=2.0, distance=5)
sorted_peaks = peak_indices[np.argsort(snr[peak_indices])[::-1]]
# Plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
# Power spectrum with background
mask = periods_days > 0.05
ax1.loglog(periods_days[mask], power[mask], linewidth=0.5, alpha=0.7, label='Power')
ax1.loglog(periods_days[mask], background_linear[mask], 'r-', linewidth=1.5, label='Background')
ax1.axvline(x=1.0, color='green', linestyle='--', alpha=0.5, label='1 day')
ax1.set_xlabel('Period (days)')
ax1.set_ylabel('Power')
ax1.set_title('Power Spectrum with Local Background')
ax1.legend()
ax1.grid(True, alpha=0.3)
# SNR
ax2.semilogx(periods_days[mask], snr[mask], linewidth=0.5)
ax2.axhline(y=1.0, color='gray', linestyle='--', alpha=0.5)
ax2.axvline(x=1.0, color='green', linestyle='--', alpha=0.5)
ax2.set_xlabel('Period (days)')
ax2.set_ylabel('Power / Background')
ax2.set_title('Signals Above Background')
ax2.grid(True, alpha=0.3)
# Annotate top peaks
for idx in sorted_peaks[:5]:
if periods_days[idx] > 0.05:
ax2.annotate(f'{periods_days[idx]:.2f} d',
xy=(periods_days[idx], snr[idx]),
xytext=(0, 10), textcoords='offset points',
ha='center', fontsize=9, color='red',
arrowprops=dict(arrowstyle='->', color='red', lw=0.8))
plt.tight_layout()
plt.savefig('fourier_corrected.png', dpi=150)
plt.show()
print("Top 5 periods above background:")
for i, idx in enumerate(sorted_peaks[:5]):
print(f"{i+1}. Period: {periods_days[idx]:.3f} days, SNR: {snr[idx]:.2f}")
Full reference implementations are available in the course repository:
| Method | MAE (°C) | RMSE (°C) | R² |
|---|---|---|---|
| 24-Hour Mean Baseline | 2.03 | 2.68 | 0.69 |
| Linear Trend (6h) | 3.88 | 4.19 | 0.23 |
| Fourier Series (4 days) | 3.60 | 4.67 | 0.04 |
| XGBoost + Engineered Features | 1.14 | 1.51 | 0.90 |
| NBEATSx-Ridge | 0.89 | 1.12 | 0.94 |
lab_notebook.html (Steps 1-9)
and ml_summary.html (Steps 10-11)u.deg units. Participants may forget this and get cryptic errors.plt.show() can cause
problems in some environments. Recommend saving plots first with
plt.savefig() then displaying.xgboost, astropy, scipy. Claude
Code will typically suggest this automatically.The exercise can be shortened by skipping the ML comparison (Steps 10-11) or by providing more specific prompts to reduce exploration time.