| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 
 | import autosklearn.regressionfrom sklearn.model_selection import train_test_split
 import pandas as pd
 import numpy as np
 
 rawdata=pd.read_excel('rawdata.xlsx')
 Y=rawdata[['cpue']]
 
 Y=np.log10(Y+1)
 X=rawdata[['lon','lat','sst','chla','doy']]
 x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=7)
 
 
 automl = autosklearn.regression.AutoSklearnRegressor(
 include_estimators=["random_forest","decision_tree","gradient_boosting","xgradient_boosting"],
 
 exclude_estimators=None,
 include_preprocessors=["no_preprocessing", ],
 exclude_preprocessors=None,
 resampling_strategy='cv',
 resampling_strategy_arguments={'folds': 10},
 )
 automl.fit(x_train, y_train.values.ravel())
 
 automl.sprint_statistics()
 automl.show_models()
 automl.refit(x_train, y_train.values.ravel())
 
 
 
 y_pre = automl.predict(X)
 ypre=np.power(10,ypre)-1
 ypre=pd.DataFrame(ypre)
 result=pd.concat([rawdata,ypre])
 result.to_excel('result.xlsx')
 
 
 
 |