1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| import autosklearn.regression from sklearn.model_selection import train_test_split import pandas as pd import numpy as np
rawdata=pd.read_excel('rawdata.xlsx') Y=rawdata[['cpue']]
Y=np.log10(Y+1) X=rawdata[['lon','lat','sst','chla','doy']] x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=7)
automl = autosklearn.regression.AutoSklearnRegressor( include_estimators=["random_forest","decision_tree","gradient_boosting","xgradient_boosting"], exclude_estimators=None, include_preprocessors=["no_preprocessing", ], exclude_preprocessors=None, resampling_strategy='cv', resampling_strategy_arguments={'folds': 10}, ) automl.fit(x_train, y_train.values.ravel())
automl.sprint_statistics() automl.show_models() automl.refit(x_train, y_train.values.ravel())
y_pre = automl.predict(X) ypre=np.power(10,ypre)-1 ypre=pd.DataFrame(ypre) result=pd.concat([rawdata,ypre]) result.to_excel('result.xlsx')
|