import pandas as pd
from IPython.display import Markdown
# Import data
df_swe_and_temp = pd.read_parquet('data/nrcs_swe_pivot.parquet')
df_soil_moisture = pd.read_parquet('data/nrcs_soil_moisture_pivot.parquet')
df_streamflow = pd.read_parquet('data/usgs_colorado_river_glenwood_springs.parquet')
df_swe_and_temp.index = pd.to_datetime(df_swe_and_temp.index)
df_soil_moisture.index = pd.to_datetime(df_soil_moisture.index)
df_streamflow.index = pd.to_datetime(df_streamflow.index)
dfs = {
'Snow Water Equivalent / Temperature': df_swe_and_temp,
'Soil Moisture': df_soil_moisture,
'Streamflow': df_streamflow
}
# Dislay metadata
for name, df in dfs.items():
header = f"### {name}"
rows = f"**Rows:** {df.shape[0]:,}"
unique_dates = f"**Unique dates:** {df.index.nunique():,}"
uniqueness = f"**Date uniqueness:** {df.shape[0] / df.index.nunique():.2f} rows per date"
columns = f"**Column names:** {', '.join(df.columns)}"
date_range = f"""**Date range:**
{df.index.min().strftime('%Y-%m-%d')} to
{df.index.max().strftime('%Y-%m-%d')}"""
markdown_content = f"{header}\n\n{rows}\n\n{unique_dates}\n\n{uniqueness}\n\n{columns}\n\n{date_range}"
display(Markdown(markdown_content))
Snow Water Equivalent / Temperature¶
Rows: 6,492
Unique dates: 6,492
Date uniqueness: 1.00 rows per date
Column names: SNWD_BisonLake, SNWD_McClurePass, WTEQ_BisonLake, WTEQ_McClurePass, PREC_BisonLake, PREC_McClurePass, PRCP_BisonLake, PRCP_McClurePass, TAVG_BisonLake, TAVG_McClurePass, TMAX_BisonLake, TMAX_McClurePass, TMIN_BisonLake, TMIN_McClurePass
Date range: 2003-06-25 to 2024-09-03
Soil Moisture¶
Rows: 3,503
Unique dates: 3,503
Date uniqueness: 1.00 rows per date
Column names: soilmoisture_station378_2ft, soilmoisture_station378_8ft, soilmoisture_station378_20ft, soilmoisture_station457_2ft, soilmoisture_station457_8ft, soilmoisture_station457_20ft, soilmoisture_station607_4ft, soilmoisture_station607_8ft, soilmoisture_station607_20ft, soilmoisture_station680_2ft, soilmoisture_station680_8ft, soilmoisture_station680_20ft, soilmoisture_station802_2ft, soilmoisture_station802_8ft, soilmoisture_station802_20ft
Date range: 2008-03-12 to 2021-07-27
Streamflow¶
Rows: 8,792
Unique dates: 8,792
Date uniqueness: 1.00 rows per date
Column names: streamflow
Date range: 2000-01-01 to 2024-01-26
df_joined = pd.concat(dfs, axis=1, join='inner')
df_joined = df_joined.droplevel(0, axis=1)
display(df_joined)
SNWD_BisonLake | SNWD_McClurePass | WTEQ_BisonLake | WTEQ_McClurePass | PREC_BisonLake | PREC_McClurePass | PRCP_BisonLake | PRCP_McClurePass | TAVG_BisonLake | TAVG_McClurePass | ... | soilmoisture_station607_4ft | soilmoisture_station607_8ft | soilmoisture_station607_20ft | soilmoisture_station680_2ft | soilmoisture_station680_8ft | soilmoisture_station680_20ft | soilmoisture_station802_2ft | soilmoisture_station802_8ft | soilmoisture_station802_20ft | streamflow | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | |||||||||||||||||||||
2008-03-12 | 82.0 | 60.0 | 28.6 | 23.9 | 29.1 | 22.9 | 0.1 | 0.0 | 19.4 | 30.2 | ... | 10.2 | 24.2 | 25.8 | 14.6 | 11.7 | 26.0 | 4.0 | 1.3 | 6.5 | 2360.0 |
2008-03-15 | 86.0 | 63.0 | 29.2 | 24.5 | 29.7 | 23.4 | 0.1 | 0.2 | 11.7 | 23.4 | ... | 10.3 | 24.4 | 25.4 | 14.7 | 11.7 | 26.2 | 4.0 | 1.4 | 6.9 | 2260.0 |
2008-03-17 | 84.0 | 63.0 | 29.3 | 24.8 | 29.8 | 23.6 | 0.0 | 0.2 | 14.4 | 24.3 | ... | 10.3 | 24.7 | 25.2 | 14.4 | 12.1 | 26.1 | 4.1 | 1.4 | 6.7 | 2260.0 |
2008-03-18 | 83.0 | 62.0 | 29.3 | 24.8 | 29.8 | 23.6 | 0.0 | 0.0 | 11.8 | 23.9 | ... | 10.6 | 24.9 | 25.3 | 14.7 | 12.1 | 26.0 | 4.0 | 1.4 | 6.6 | 2260.0 |
2008-03-19 | 82.0 | 61.0 | 29.4 | 24.9 | 29.8 | 23.6 | 0.0 | 0.0 | 20.8 | 30.2 | ... | 10.6 | 25.0 | 25.1 | 14.6 | 11.7 | 25.9 | 4.0 | 1.6 | 6.9 | 2200.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-07-23 | 0.0 | 0.0 | 0.0 | 0.0 | 27.7 | 20.7 | 0.2 | 0.2 | 52.9 | 62.1 | ... | 13.8 | 14.5 | 11.5 | 35.5 | 14.2 | 30.2 | 24.1 | 5.5 | 9.2 | 1170.0 |
2021-07-24 | 0.0 | 0.0 | 0.0 | 0.0 | 27.7 | 21.1 | 0.0 | 0.4 | 54.7 | 57.0 | ... | 13.4 | 14.1 | 11.3 | 33.7 | 13.9 | 30.2 | 24.0 | 5.3 | 8.7 | 1240.0 |
2021-07-25 | 0.0 | 0.0 | 0.0 | 0.0 | 27.8 | 21.5 | 0.1 | 0.4 | 52.7 | 57.6 | ... | 13.1 | 13.9 | 10.8 | 39.8 | 13.8 | 30.2 | 23.8 | 5.0 | 8.1 | 1190.0 |
2021-07-26 | 0.0 | 0.0 | 0.0 | 0.0 | 27.8 | 21.5 | 0.0 | 0.0 | 57.0 | 61.2 | ... | 12.9 | 13.9 | 10.4 | 37.7 | 14.1 | 30.2 | 23.7 | 4.8 | 7.0 | 1170.0 |
2021-07-27 | 0.0 | 0.0 | 0.0 | 0.0 | 27.8 | 21.5 | 0.0 | 0.0 | 58.5 | 64.8 | ... | 12.4 | 13.6 | 10.1 | 34.0 | 13.9 | 30.1 | 23.9 | 4.4 | 6.1 | 1110.0 |
2996 rows × 30 columns
df_joined.to_parquet('data/streamflow_prediction_dataset.parquet')