In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error
from mlxtend.plotting import plot_decision_regions
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
In [2]:
import warnings
warnings.filterwarnings('ignore')

Gather Data¶

In [3]:
df_x = pd.read_csv('data/streamflow_prediction_dataset_averaged_cols.csv', index_col=0, parse_dates=True)
df_y = pd.read_csv('data/streamflow_prediction_dataset.csv', index_col=0, parse_dates=True)['streamflow']

df_x['Snow'] = np.where((df_x['WTEQ_BisonLake'] > 0) | (df_x['WTEQ_McClurePass'] > 0), 1, 0)

df_x = df_x.drop(
    columns=[
        'WTEQ_BisonLake', 'WTEQ_McClurePass', 'soilmoisture_Avg_2ft', 
        'soilmoisture_Avg_4ft', 'soilmoisture_Avg_20ft'
    ]
)

df = pd.concat([df_x, df_y], axis=1)

df.to_csv('data/snow_soilmoisture_prediction_dataset.csv')

display(df)
PREC_Avg TAVG_Avg soilmoisture_Avg_8ft Snow streamflow
date
2008-03-12 26.00 24.80 17.74 1 2360.0
2008-03-15 26.55 17.55 17.88 1 2260.0
2008-03-17 26.70 19.35 18.04 1 2260.0
2008-03-18 26.70 17.85 18.06 1 2260.0
2008-03-19 26.70 25.50 18.06 1 2200.0
... ... ... ... ... ...
2021-07-23 24.20 57.50 14.60 0 1170.0
2021-07-24 24.40 55.85 14.38 0 1240.0
2021-07-25 24.65 55.15 14.24 0 1190.0
2021-07-26 24.65 59.10 14.26 0 1170.0
2021-07-27 24.65 61.65 14.08 0 1110.0

2996 rows × 5 columns

In [4]:
# visualize
palette={1: 'blue', 0: 'grey'}
sns.scatterplot(data=df, x='PREC_Avg', y='TAVG_Avg', hue='Snow', palette=palette)
plt.title('Precipitation, Average Temperature labeled by Snow')
plt.show()
No description has been provided for this image