In [1]:
# Precipitation - inches (accumulation)  
# Temperature - Fahrenheit  
# Snow - Binary (0 - no snow, 1 - snow)
# Streamflow - ft³/s
In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import os
import random
import tensorflow as tf
import joblib

from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from keras.callbacks import EarlyStopping
from keras.optimizers.legacy import Adam

import os
import numpy as np
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import random

# Set global random seed for reproducibility
RANDOM_STATE = 42
os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

# Option to re-run the model, otherwise load the model from /models
RUN_MODEL = True

# Set global random seed for reproducibility
RANDOM_STATE = 42
os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

# Option to re-run the model, otherwise load the model from /models
RUN_MODEL = True

Gather Data¶

In [3]:
df = pd.read_csv('data/datasets/snow_soilmoisture_prediction_dataset.csv', index_col=0, parse_dates=True)
display(df)

# feature engineering
rolling_window = 7
df = df.asfreq('D').interpolate()
df['prec_year'] = df['PREC_Avg'].groupby(df.index.year).cumsum()
df['prec_rolling'] = df['PREC_Avg'].rolling(window=rolling_window).mean()
df['temp_rolling'] = df['TAVG_Avg'].rolling(window=rolling_window).mean()
df['soil_rolling'] = df['soilmoisture_Avg_8ft'].rolling(window=rolling_window).mean()
df['dayofyear'] = df.index.dayofyear
df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/365)
df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/365)

df = df.dropna()
display(df)

for feature in df.columns:
    plt.figure(figsize=(10, 6))
    sns.lineplot(x=df.index, y=feature, data=df)
    plt.title(feature)
    plt.show()
PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow
date
2008-03-12 26.00 24.80 17.74 1 2360.0
2008-03-15 26.55 17.55 17.88 1 2260.0
2008-03-17 26.70 19.35 18.04 1 2260.0
2008-03-18 26.70 17.85 18.06 1 2260.0
2008-03-19 26.70 25.50 18.06 1 2200.0
... ... ... ... ... ...
2021-07-23 24.20 57.50 14.60 0 1170.0
2021-07-24 24.40 55.85 14.38 0 1240.0
2021-07-25 24.65 55.15 14.24 0 1190.0
2021-07-26 24.65 59.10 14.26 0 1170.0
2021-07-27 24.65 61.65 14.08 0 1110.0

2996 rows × 5 columns

PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow prec_year prec_rolling temp_rolling soil_rolling dayofyear sin_dayofyear cos_dayofyear
date
2008-03-18 26.70 17.85 18.06 1.0 2260.0 185.125 26.446429 20.050000 17.900000 78 0.974100 0.226116
2008-03-19 26.70 25.50 18.06 1.0 2200.0 211.825 26.546429 20.150000 17.945714 79 0.977848 0.209315
2008-03-20 26.70 26.60 18.10 1.0 1930.0 238.525 26.620238 20.752381 17.990476 80 0.981306 0.192452
2008-03-21 26.75 24.00 18.14 1.0 1780.0 265.275 26.675000 21.328571 18.034286 81 0.984474 0.175531
2008-03-22 26.75 17.75 18.04 1.0 1740.0 292.025 26.703571 21.357143 18.057143 82 0.987349 0.158559
... ... ... ... ... ... ... ... ... ... ... ... ...
2021-07-23 24.20 57.50 14.60 0.0 1170.0 3572.500 23.735714 60.271429 14.680000 204 -0.361714 -0.932289
2021-07-24 24.40 55.85 14.38 0.0 1240.0 3596.900 23.857143 59.914286 14.582857 205 -0.377708 -0.925925
2021-07-25 24.65 55.15 14.24 0.0 1190.0 3621.550 24.014286 58.785714 14.482857 206 -0.393590 -0.919286
2021-07-26 24.65 59.10 14.26 0.0 1170.0 3646.200 24.171429 58.057143 14.428571 207 -0.409356 -0.912375
2021-07-27 24.65 61.65 14.08 0.0 1110.0 3670.850 24.321429 58.078571 14.365714 208 -0.425000 -0.905193

4880 rows × 12 columns

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

LSTM¶

In [4]:
def define_lstm_model(input_shape):
    """Define an LSTM model."""
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse')
    return model

def plot_loss(history, split_index):
    """Plot training and validation loss."""
    plt.figure(figsize=(8, 4))
    plt.plot(history.history['loss'], label='Training Loss', color='blue')
    plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
    plt.title(f'Training and Validation Loss (Split {split_index})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def main():
    # Ensure the 'models' folder exists
    if not os.path.exists('models'):
        os.makedirs('models')

    # Pre-scale the entire dataset globally
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_raw = df.drop(columns=['streamflow'])  # Input features
    y_raw = df['streamflow']  # Target variable

    # Fit scalers on the entire dataset
    X_scaled = scaler_X.fit_transform(X_raw)
    y_scaled = scaler_y.fit_transform(y_raw.values.reshape(-1, 1))

    # Save the global scalers for later use
    joblib.dump(scaler_X, 'models/scaler_X_global.pkl')
    joblib.dump(scaler_y, 'models/scaler_y_global.pkl')
    print("Global scalers saved successfully.")

    # Early stopping configuration
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

    # Time Series Split Configuration
    tscv = TimeSeriesSplit(n_splits=5)

    # Store metrics and predictions across all splits
    rmse_list, mae_list = [], []
    all_preds, all_actuals = [], []

    # Loop through each split in TimeSeriesSplit
    for split_index, (train_index, test_index) in enumerate(tscv.split(X_scaled), start=1):
        print(f"Training on Split {split_index}...")

        # Train-test split using pre-scaled data
        X_train, X_test = X_scaled[train_index], X_scaled[test_index]
        y_train, y_test = y_scaled[train_index], y_scaled[test_index]

        # Reshape for LSTM input
        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

        # Define and train LSTM model
        model = define_lstm_model(input_shape=(X_train.shape[1], 1))
        history = model.fit(
            X_train, y_train, epochs=100, verbose=0, batch_size=32,
            validation_data=(X_test, y_test), callbacks=[early_stopping]
        )

        # Predict and store results
        y_pred = model.predict(X_test)
        y_pred_rescaled = scaler_y.inverse_transform(y_pred)
        y_test_rescaled = scaler_y.inverse_transform(y_test)

        all_preds.extend(y_pred_rescaled.flatten())
        all_actuals.extend(y_test_rescaled.flatten())

        # Calculate metrics
        rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
        mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
        rmse_list.append(rmse)
        mae_list.append(mae)
        print(f"Split {split_index} - RMSE: {rmse:.4f}, MAE: {mae:.4f}")

        # Plot training and validation loss
        plot_loss(history, split_index)

        # Save the model
        model.save(f"models/lstm_model_fold{split_index}.keras")
        print(f"Model for Split {split_index} saved successfully.")

    # Combine all predictions and actuals
    final_preds = np.array(all_preds)
    final_actuals = np.array(all_actuals)

    # Calculate overall metrics
    avg_rmse = np.mean(rmse_list)
    avg_mae = np.mean(mae_list)
    print("\nCross-Validation Ensemble Results:")
    print(f"Average RMSE: {avg_rmse:.4f}")
    print(f"Average MAE: {avg_mae:.4f}")
    
    # Averaged Predictions from All 5 Models
    print("\nGenerating Averaged Predictions Across All 5 Models...")

    # Reshape the entire dataset for LSTM input
    X_full = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

    # Placeholder for predictions from all models
    full_preds = []

    # Load all 5 trained models and predict on the entire dataset
    for i in range(1, 6):  # Assuming 5 splits
        print(f"Loading model fold {i}...")
        model = tf.keras.models.load_model(f"models/lstm_model_fold{i}.keras")

        # Predict on the full dataset
        y_pred = model.predict(X_full)
        y_pred_rescaled = scaler_y.inverse_transform(y_pred)

        full_preds.append(y_pred_rescaled.flatten())  # Store predictions

    # Average the predictions across all models
    averaged_preds = np.mean(full_preds, axis=0)

    # Add averaged predictions to the original DataFrame
    df['streamflow_pred'] = averaged_preds

    # Save the updated DataFrame with predictions
    df.to_csv('data/predictions/streamflow_predictions.csv', index=True)
    print("Predictions saved to 'data/predictions/streamflow_predictions.csv'.")

    # Plot actual vs averaged predictions
    plt.figure(figsize=(12, 6))
    plt.plot(df.index, df['streamflow'], label='Actual Streamflow', color='blue')
    plt.plot(df.index, df['streamflow_pred'], label='Averaged Predictions', color='red')
    plt.title('Averaged Streamflow Predictions Across All 5 Models')
    plt.xlabel('Time')
    plt.ylabel('Streamflow')
    plt.legend()
    plt.show()

    # Display the DataFrame with predictions
    print("\nDataFrame with Streamflow Predictions:")
    display(df)

if RUN_MODEL:
    main()
Global scalers saved successfully.
Training on Split 1...
26/26 [==============================] - 0s 692us/step
Split 1 - RMSE: 4160.8270, MAE: 2016.5814
No description has been provided for this image
Model for Split 1 saved successfully.
Training on Split 2...
26/26 [==============================] - 0s 692us/step
Split 2 - RMSE: 2626.9774, MAE: 1469.3910
No description has been provided for this image
Model for Split 2 saved successfully.
Training on Split 3...
26/26 [==============================] - 0s 679us/step
Split 3 - RMSE: 1136.7066, MAE: 786.0593
No description has been provided for this image
Model for Split 3 saved successfully.
Training on Split 4...
26/26 [==============================] - 0s 882us/step
Split 4 - RMSE: 624.5595, MAE: 409.4142
No description has been provided for this image
Model for Split 4 saved successfully.
Training on Split 5...
26/26 [==============================] - 0s 736us/step
Split 5 - RMSE: 1241.8549, MAE: 975.1915
No description has been provided for this image
Model for Split 5 saved successfully.

Cross-Validation Ensemble Results:
Average RMSE: 1958.1851
Average MAE: 1131.3275

Generating Averaged Predictions Across All 5 Models...
Loading model fold 1...
153/153 [==============================] - 0s 985us/step
Loading model fold 2...
153/153 [==============================] - 0s 605us/step
Loading model fold 3...
153/153 [==============================] - 0s 609us/step
Loading model fold 4...
153/153 [==============================] - 0s 625us/step
Loading model fold 5...
153/153 [==============================] - 0s 619us/step
Predictions saved to 'data/predictions/streamflow_predictions.csv'.
No description has been provided for this image
DataFrame with Streamflow Predictions:
PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow prec_year prec_rolling temp_rolling soil_rolling dayofyear sin_dayofyear cos_dayofyear streamflow_pred
date
2008-03-18 26.70 17.85 18.06 1.0 2260.0 185.125 26.446429 20.050000 17.900000 78 0.974100 0.226116 2097.528809
2008-03-19 26.70 25.50 18.06 1.0 2200.0 211.825 26.546429 20.150000 17.945714 79 0.977848 0.209315 2069.220215
2008-03-20 26.70 26.60 18.10 1.0 1930.0 238.525 26.620238 20.752381 17.990476 80 0.981306 0.192452 2054.645752
2008-03-21 26.75 24.00 18.14 1.0 1780.0 265.275 26.675000 21.328571 18.034286 81 0.984474 0.175531 2046.221436
2008-03-22 26.75 17.75 18.04 1.0 1740.0 292.025 26.703571 21.357143 18.057143 82 0.987349 0.158559 2042.245361
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-07-23 24.20 57.50 14.60 0.0 1170.0 3572.500 23.735714 60.271429 14.680000 204 -0.361714 -0.932289 1458.045898
2021-07-24 24.40 55.85 14.38 0.0 1240.0 3596.900 23.857143 59.914286 14.582857 205 -0.377708 -0.925925 1484.145752
2021-07-25 24.65 55.15 14.24 0.0 1190.0 3621.550 24.014286 58.785714 14.482857 206 -0.393590 -0.919286 1500.855957
2021-07-26 24.65 59.10 14.26 0.0 1170.0 3646.200 24.171429 58.057143 14.428571 207 -0.409356 -0.912375 1463.360352
2021-07-27 24.65 61.65 14.08 0.0 1110.0 3670.850 24.321429 58.078571 14.365714 208 -0.425000 -0.905193 1441.485107

4880 rows × 13 columns

Predict with RCP Scenarios¶

In [5]:
df_rcp45 = pd.read_csv(
    'data/predictions/rcp45_snow_rf_soilmoisture_lstm_predictions.csv', index_col=0, parse_dates=True)
df_rcp85 = pd.read_csv(
    'data/predictions/rcp85_snow_rf_soilmoisture_lstm_predictions.csv', index_col=0, parse_dates=True)

# add same features
df_rcp45['prec_year'] = df_rcp45['PREC_Avg'].groupby(df_rcp45.index.year).cumsum()
df_rcp45['prec_rolling'] = df_rcp45['PREC_Avg'].rolling(window=30).mean()
df_rcp45['temp_rolling'] = df_rcp45['TAVG_Avg'].rolling(window=30).mean()
df_rcp45['soil_rolling'] = df_rcp45['soilmoisture_Avg_8ft'].rolling(window=30).mean()
df_rcp45['dayofyear'] = df_rcp45.index.dayofyear
df_rcp45['sin_dayofyear'] = np.sin(2*np.pi*df_rcp45['dayofyear']/365)
df_rcp45['cos_dayofyear'] = np.cos(2*np.pi*df_rcp45['dayofyear']/365)

df_rcp85['prec_year'] = df_rcp85['PREC_Avg'].groupby(df_rcp85.index.year).cumsum()
df_rcp85['prec_rolling'] = df_rcp85['PREC_Avg'].rolling(window=30).mean()
df_rcp85['temp_rolling'] = df_rcp85['TAVG_Avg'].rolling(window=30).mean()
df_rcp85['soil_rolling'] = df_rcp85['soilmoisture_Avg_8ft'].rolling(window=30).mean()
df_rcp85['dayofyear'] = df_rcp85.index.dayofyear
df_rcp85['sin_dayofyear'] = np.sin(2*np.pi*df_rcp85['dayofyear']/365)
df_rcp85['cos_dayofyear'] = np.cos(2*np.pi*df_rcp85['dayofyear']/365)

df_rcp45 = df_rcp45.sort_index().reindex(columns=df.columns)
df_rcp85 = df_rcp85.sort_index().reindex(columns=df.columns)
df_rcp45 = df_rcp45.dropna(subset=['prec_rolling', 'temp_rolling', 'soil_rolling'])
df_rcp85 = df_rcp85.dropna(subset=['prec_rolling', 'temp_rolling', 'soil_rolling'])
display(df_rcp45, df_rcp85)
PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow prec_year prec_rolling temp_rolling soil_rolling dayofyear sin_dayofyear cos_dayofyear streamflow_pred
time
2006-01-30 12:00:00 2.592942 16.299992 17.351016 1 NaN 44.493768 1.483126 23.223791 14.436170 30 4.937756e-01 0.869589 NaN
2006-01-31 12:00:00 2.592942 27.272213 17.322869 1 NaN 47.086710 1.569400 23.096730 14.706932 31 5.086709e-01 0.860961 NaN
2006-02-01 12:00:00 2.593391 31.274729 17.074762 1 NaN 49.680101 1.645637 23.393285 14.969424 32 5.234156e-01 0.852078 NaN
2006-02-02 12:00:00 2.594664 24.935026 16.870588 1 NaN 52.274765 1.720172 23.573343 15.225111 33 5.380052e-01 0.842942 NaN
2006-02-03 12:00:00 2.601046 21.481874 16.697251 1 NaN 54.875811 1.794919 23.830469 15.475019 34 5.524353e-01 0.833556 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2100-12-27 12:00:00 27.937475 39.995155 25.947626 1 NaN 5896.129146 27.894015 36.885914 25.248856 361 -6.880243e-02 0.997630 NaN
2100-12-28 12:00:00 27.938652 35.275560 25.833564 1 NaN 5924.067798 27.896113 37.214156 25.241393 362 -5.161967e-02 0.998667 NaN
2100-12-29 12:00:00 27.938660 28.253563 25.831460 1 NaN 5952.006458 27.898210 37.133476 25.243655 363 -3.442161e-02 0.999407 NaN
2100-12-30 12:00:00 27.941078 38.389730 25.775737 1 NaN 5979.947536 27.900381 37.318801 25.251057 364 -1.721336e-02 0.999852 NaN
2100-12-31 12:00:00 28.007875 32.480030 25.720054 1 NaN 6007.955411 27.904470 37.332162 25.258877 365 6.432491e-16 1.000000 NaN

34669 rows × 13 columns

PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow prec_year prec_rolling temp_rolling soil_rolling dayofyear sin_dayofyear cos_dayofyear streamflow_pred
time
2006-01-30 12:00:00 2.239401 28.573797 15.959198 1 NaN 42.871709 1.429057 21.981437 13.816520 30 4.937756e-01 0.869589 NaN
2006-01-31 12:00:00 2.239407 32.932182 16.151044 1 NaN 45.111116 1.503557 22.042566 14.048222 31 5.086709e-01 0.860961 NaN
2006-02-01 12:00:00 2.247728 28.678800 16.308308 1 NaN 47.358844 1.568231 22.252132 14.285165 32 5.234156e-01 0.852078 NaN
2006-02-02 12:00:00 2.247747 30.933344 16.383022 1 NaN 49.606591 1.631189 22.631842 14.524599 33 5.380052e-01 0.842942 NaN
2006-02-03 12:00:00 2.248361 39.262460 16.583585 1 NaN 51.854952 1.694167 23.481412 14.770719 34 5.524353e-01 0.833556 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2100-12-27 12:00:00 32.626354 40.788258 25.361338 1 NaN 6348.835813 32.161526 38.291567 25.926905 361 -6.880243e-02 0.997630 NaN
2100-12-28 12:00:00 32.643470 40.686527 25.366339 1 NaN 6381.479283 32.193288 38.194772 25.899159 362 -5.161967e-02 0.998667 NaN
2100-12-29 12:00:00 32.649437 37.635570 25.386576 1 NaN 6414.128720 32.225226 37.931623 25.872653 363 -3.442161e-02 0.999407 NaN
2100-12-30 12:00:00 32.702927 39.204860 25.310180 1 NaN 6446.831647 32.258801 37.714852 25.874019 364 -1.721336e-02 0.999852 NaN
2100-12-31 12:00:00 32.736645 37.206974 26.476085 1 NaN 6479.568292 32.291749 37.494622 25.911054 365 6.432491e-16 1.000000 NaN

34669 rows × 13 columns

In [6]:
from keras.models import load_model

# Load global scalers
scaler_X = joblib.load('models/scaler_X_global.pkl')
scaler_y = joblib.load('models/scaler_y_global.pkl')

# Scale RCP datasets using the global input scaler
try:
    df_rcp45_scaled = scaler_X.transform(df_rcp45.drop(columns=['streamflow', 'streamflow_pred']))
    df_rcp85_scaled = scaler_X.transform(df_rcp85.drop(columns=['streamflow', 'streamflow_pred']))
except:
    df_rcp45_scaled = scaler_X.transform(df_rcp45.drop(columns=['streamflow']))
    df_rcp85_scaled = scaler_X.transform(df_rcp85.drop(columns=['streamflow']))  

# Placeholder for RCP predictions
rcp45_preds = []
rcp85_preds = []

# Load models and predict
for i in range(1, 6):  # Assuming 5 splits
    print(f"Predicting with model fold {i}...")

    # Load the trained model
    model = load_model(f"models/lstm_model_fold{i}.keras")

    # Reshape RCP data for LSTM input
    X_rcp45 = df_rcp45_scaled.reshape((df_rcp45_scaled.shape[0], df_rcp45_scaled.shape[1], 1))
    X_rcp85 = df_rcp85_scaled.reshape((df_rcp85_scaled.shape[0], df_rcp85_scaled.shape[1], 1))

    # Predict and inverse transform
    rcp45_pred = model.predict(X_rcp45)
    rcp85_pred = model.predict(X_rcp85)

    rcp45_preds.append(scaler_y.inverse_transform(rcp45_pred).flatten())
    rcp85_preds.append(scaler_y.inverse_transform(rcp85_pred).flatten())

# Average Predictions Across All Models
rcp45_streamflow_pred_rescaled = np.mean(rcp45_preds, axis=0)
rcp85_streamflow_pred_rescaled = np.mean(rcp85_preds, axis=0)
Predicting with model fold 1...
1084/1084 [==============================] - 1s 586us/step
1084/1084 [==============================] - 1s 569us/step
Predicting with model fold 2...
1084/1084 [==============================] - 1s 635us/step
1084/1084 [==============================] - 1s 578us/step
Predicting with model fold 3...
1084/1084 [==============================] - 1s 650us/step
1084/1084 [==============================] - 1s 571us/step
Predicting with model fold 4...
1084/1084 [==============================] - 1s 619us/step
1084/1084 [==============================] - 1s 552us/step
Predicting with model fold 5...
1084/1084 [==============================] - 1s 554us/step
1084/1084 [==============================] - 1s 569us/step
In [7]:
# final dataset
df_rcp45['streamflow'] = rcp45_streamflow_pred_rescaled
df_rcp85['streamflow'] = rcp85_streamflow_pred_rescaled
df_rcp45.to_csv('data/predictions/rcp45_streamflow_listm_predictions.csv')
df_rcp85.to_csv('data/predictions/rcp85_streamflow_listm_predictions.csv')
In [8]:
# Ensure x-axis is a flat numeric array
x_years_rcp45 = df_rcp45.index.year.to_numpy()
x_years_rcp85 = df_rcp85.index.year.to_numpy()

# Calculate trendline for RCP 4.5
slope_rcp45, intercept_rcp45 = np.polyfit(
    x_years_rcp45, rcp45_streamflow_pred_rescaled, 1)
trendline_label_rcp45 = f'Trendline (y={slope_rcp45:.2f}x + {intercept_rcp45:.2f})'

# Plot Results for RCP 4.5
plt.figure(figsize=(12, 6))
sns.lineplot(x=x_years_rcp45, y=rcp45_streamflow_pred_rescaled,
             label='RCP 4.5 Ensemble Prediction', color='blue')
sns.regplot(x=x_years_rcp45, y=rcp45_streamflow_pred_rescaled, scatter=False,
            color='blue', label=trendline_label_rcp45, line_kws={'alpha': 0.2, 'linestyle': '--'}, ci=None)
plt.title('RCP 4.5 - Streamflow Predictions (Ensemble)')
plt.xlabel('Year')
plt.ylabel('Streamflow')
# plt.ylim(2300, 2900)
plt.legend()
plt.show()

# Calculate trendline for RCP 8.5
slope_rcp85, intercept_rcp85 = np.polyfit(
    x_years_rcp85, rcp85_streamflow_pred_rescaled, 1)
trendline_label_rcp85 = f'Trendline (y={slope_rcp85:.2f}x + {intercept_rcp85:.2f})'

# Plot Results for RCP 8.5
plt.figure(figsize=(12, 6))
sns.lineplot(x=x_years_rcp85, y=rcp85_streamflow_pred_rescaled,
             label='RCP 8.5 Ensemble Prediction', color='red')
sns.regplot(x=x_years_rcp85, y=rcp85_streamflow_pred_rescaled, scatter=False,
            color='red', label=trendline_label_rcp85, line_kws={'alpha': 0.2, 'linestyle': '--'}, ci=None)
plt.title('RCP 8.5 - Streamflow Predictions (Ensemble)')
plt.xlabel('Year')
plt.ylabel('Streamflow')
# plt.ylim(2300, 2900)
plt.legend()
plt.show()

# Plot Results for Both RCPs
plt.figure(figsize=(12, 6))
sns.lineplot(x=x_years_rcp45, y=rcp45_streamflow_pred_rescaled,
             label='RCP 4.5 Ensemble Prediction', color='blue', errorbar=None)
sns.regplot(x=x_years_rcp45, y=rcp45_streamflow_pred_rescaled, scatter=False,
            color='blue', line_kws={'alpha': 0.2, 'linestyle': '--'}, ci=None)
sns.lineplot(x=x_years_rcp85, y=rcp85_streamflow_pred_rescaled,
                label='RCP 8.5 Ensemble Prediction', color='red', errorbar=None)
sns.regplot(x=x_years_rcp85, y=rcp85_streamflow_pred_rescaled, scatter=False,
            color='red', line_kws={'alpha': 0.2, 'linestyle': '--'}, ci=None)
plt.title('RCP 4.5 vs RCP 8.5 - Streamflow Predictions (Ensemble)')
plt.xlabel('Year')
plt.ylabel('Streamflow')
# plt.ylim(2300, 2900)
plt.legend()
plt.show()

# Aggregate precipitation and temperature to yearly values
df_rcp45_yearly = df_rcp45.resample('Y').agg({
    'PREC_Avg': 'sum',
    'TAVG_Avg': 'mean',
    'streamflow': 'mean'
})

df_rcp85_yearly = df_rcp85.resample('Y').agg({
    'PREC_Avg': 'sum',
    'TAVG_Avg': 'mean',
    'streamflow': 'mean'
})

# Plot yearly aggregated precipitation and temperature for RCP 4.5
fig, ax1 = plt.subplots(figsize=(12, 6))

color = 'tab:blue'
ax1.set_xlabel('Year')
ax1.set_ylabel('Streamflow', color=color)
ax1.plot(df_rcp45_yearly.index.year, df_rcp45_yearly['streamflow'], label='Streamflow Prediction (RCP 4.5)', color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Average Temperature', color=color)
ax2.plot(df_rcp45_yearly.index.year, df_rcp45_yearly['TAVG_Avg'], label='Average Temperature (RCP 4.5)', color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()
plt.title('Yearly Aggregated Precipitation and Temperature (RCP 4.5)')
fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
plt.show()

# Plot yearly aggregated precipitation and temperature for RCP 8.5
fig, ax1 = plt.subplots(figsize=(12, 6))

color = 'tab:blue'
ax1.set_xlabel('Year')
ax1.set_ylabel('Streamflow', color=color)
ax1.plot(df_rcp85_yearly.index.year, df_rcp85_yearly['streamflow'], label='Streamflow Prediction (RCP 8.5)', color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Average Temperature', color=color)
ax2.plot(df_rcp85_yearly.index.year, df_rcp85_yearly['TAVG_Avg'], label='Average Temperature (RCP 8.5)', color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()
plt.title('Yearly Aggregated Precipitation and Temperature (RCP 8.5)')
fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
/var/folders/cj/3zyd4jkj3s1f2bnvnnc629300000gn/T/ipykernel_84198/919624553.py:59: FutureWarning: 'Y' is deprecated and will be removed in a future version, please use 'YE' instead.
  df_rcp45_yearly = df_rcp45.resample('Y').agg({
/var/folders/cj/3zyd4jkj3s1f2bnvnnc629300000gn/T/ipykernel_84198/919624553.py:65: FutureWarning: 'Y' is deprecated and will be removed in a future version, please use 'YE' instead.
  df_rcp85_yearly = df_rcp85.resample('Y').agg({
No description has been provided for this image
No description has been provided for this image
In [9]:
import matplotlib.colors as mcolors

# Extract year and month from the index
df_rcp45['Year'] = df_rcp45.index.year
df_rcp45['Month'] = df_rcp45.index.month

# Group by year and month, then calculate the mean streamflow for each month
monthly_streamflow_rcp45 = df_rcp45.groupby(['Year', 'Month'])['streamflow'].mean().unstack(level=0)

# Extract year and month from the historical data
df['Year'] = df.index.year
df['Month'] = df.index.month

# Calculate the average monthly streamflow for the historical data
monthly_streamflow_actual_avg = df.groupby('Month')['streamflow'].mean()

# Plot the data with color gradient
fig, ax = plt.subplots(figsize=(14, 8))
colors = plt.cm.viridis(np.linspace(0, 1, len(monthly_streamflow_rcp45.columns)))
for i, year in enumerate(monthly_streamflow_rcp45.columns):
    ax.plot(monthly_streamflow_rcp45.index, monthly_streamflow_rcp45[year], color=colors[i], alpha=0.3)

# Plot the average historical data
ax.plot(monthly_streamflow_actual_avg.index, monthly_streamflow_actual_avg, color='red', alpha=0.3, label='Historical Average')

ax.set_title('Monthly Streamflow by Year (RCP 4.5)')
ax.set_xlabel('Month')
ax.set_ylabel('Streamflow')
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=monthly_streamflow_rcp45.columns.min(), vmax=monthly_streamflow_rcp45.columns.max()))
sm.set_array([])
fig.colorbar(sm, ax=ax, label='Year')
ax.legend()
plt.show()

# Extract year and month from the index
df_rcp85['Year'] = df_rcp85.index.year
df_rcp85['Month'] = df_rcp85.index.month

# Group by year and month, then calculate the mean streamflow for each month
monthly_streamflow_rcp85 = df_rcp85.groupby(['Year', 'Month'])['streamflow'].mean().unstack(level=0)

# Plot the data with color gradient
fig, ax = plt.subplots(figsize=(14, 8))
colors = plt.cm.viridis(np.linspace(0, 1, len(monthly_streamflow_rcp85.columns)))
for i, year in enumerate(monthly_streamflow_rcp85.columns):
    ax.plot(monthly_streamflow_rcp85.index, monthly_streamflow_rcp85[year], color=colors[i], alpha=0.5)

# Plot the average historical data
ax.plot(monthly_streamflow_actual_avg.index, monthly_streamflow_actual_avg, color='red', alpha=0.5, label='Historical Average')

ax.set_title('Monthly Streamflow by Year (RCP 8.5)')
ax.set_xlabel('Month')
ax.set_ylabel('Streamflow')
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=monthly_streamflow_rcp85.columns.min(), vmax=monthly_streamflow_rcp85.columns.max()))
sm.set_array([])
fig.colorbar(sm, ax=ax, label='Year')
ax.legend()
plt.show()
No description has been provided for this image
No description has been provided for this image