In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error
from mlxtend.plotting import plot_decision_regions
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
In [2]:
import warnings
warnings.filterwarnings('ignore')
Gather Data¶
In [3]:
df_x = pd.read_csv('data/streamflow_prediction_dataset_averaged_cols.csv', index_col=0, parse_dates=True)
df_y = pd.read_csv('data/streamflow_prediction_dataset.csv', index_col=0, parse_dates=True)['streamflow']
df_x['Snow'] = np.where((df_x['WTEQ_BisonLake'] > 0) | (df_x['WTEQ_McClurePass'] > 0), 1, 0)
df_x = df_x.drop(
columns=[
'WTEQ_BisonLake', 'WTEQ_McClurePass', 'soilmoisture_Avg_2ft',
'soilmoisture_Avg_4ft', 'soilmoisture_Avg_20ft'
]
)
df = pd.concat([df_x, df_y], axis=1)
df.to_csv('data/snow_soilmoisture_prediction_dataset.csv')
display(df)
PREC_Avg | TAVG_Avg | soilmoisture_Avg_8ft | Snow | streamflow | |
---|---|---|---|---|---|
date | |||||
2008-03-12 | 26.00 | 24.80 | 17.74 | 1 | 2360.0 |
2008-03-15 | 26.55 | 17.55 | 17.88 | 1 | 2260.0 |
2008-03-17 | 26.70 | 19.35 | 18.04 | 1 | 2260.0 |
2008-03-18 | 26.70 | 17.85 | 18.06 | 1 | 2260.0 |
2008-03-19 | 26.70 | 25.50 | 18.06 | 1 | 2200.0 |
... | ... | ... | ... | ... | ... |
2021-07-23 | 24.20 | 57.50 | 14.60 | 0 | 1170.0 |
2021-07-24 | 24.40 | 55.85 | 14.38 | 0 | 1240.0 |
2021-07-25 | 24.65 | 55.15 | 14.24 | 0 | 1190.0 |
2021-07-26 | 24.65 | 59.10 | 14.26 | 0 | 1170.0 |
2021-07-27 | 24.65 | 61.65 | 14.08 | 0 | 1110.0 |
2996 rows × 5 columns
In [4]:
# visualize
palette={1: 'blue', 0: 'grey'}
sns.scatterplot(data=df, x='PREC_Avg', y='TAVG_Avg', hue='Snow', palette=palette)
plt.title('Precipitation, Average Temperature labeled by Snow')
plt.show()