@@ -156,32 +156,32 @@ __verbose__ : int, default=0
156156 import pandas as pd
157157 from sklearn.ensemble import RandomForestClassifier
158158 from boruta import BorutaPy
159-
159+
160160 # load X and y
161161 # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
162- X = pd.read_csv('my_X_table.csv', index_col=0).values
163- y = pd.read_csv('my_y_vector.csv', index_col=0).values
164-
162+ X = pd.read_csv('examples/test_X.csv', index_col=0).values
163+ y = pd.read_csv('examples/test_y.csv', header=None, index_col=0).values
164+ y = y.ravel()
165+
165166 # define random forest classifier, with utilising all cores and
166167 # sampling in proportion to y labels
167168 rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=5)
168-
169+
169170 # define Boruta feature selection method
170- feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)
171-
172- # find all relevant features
171+ feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1 )
172+
173+ # find all relevant features - 5 features should be selected
173174 feat_selector.fit(X, y)
174-
175- # check selected features
175+
176+ # check selected features - first 5 features are selected
176177 feat_selector.support_
177-
178+
178179 # check ranking of features
179180 feat_selector.ranking_
180-
181+
181182 # call transform() on X to filter it down to selected features
182183 X_filtered = feat_selector.transform(X)
183184
184-
185185## References ##
186186
1871871 . Kursa M., Rudnicki W., "Feature Selection with the Boruta Package" Journal of Statistical Software, Vol. 36, Issue 11, Sep 2010
0 commit comments