Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.

Commit f8f6c3d

Browse files
authored
Merge pull request #94 from fabianp/sag_dataset
ENH: allow SAG* objects to take a RowDataset as argument.
2 parents b6bc773 + 8de52a4 commit f8f6c3d

File tree

4 files changed

+15610
-585
lines changed

4 files changed

+15610
-585
lines changed

lightning/impl/sag.py

Lines changed: 26 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55

66
import numpy as np
77

8-
from sklearn.utils.extmath import row_norms
98
from sklearn.externals.six.moves import xrange
109

1110
from .base import BaseClassifier, BaseRegressor
1211
from .dataset_fast import get_dataset
13-
from .sag_fast import _sag_fit
12+
from .sag_fast import _sag_fit, get_auto_step_size
1413

1514
from .sgd_fast import ModifiedHuber
1615
from .sgd_fast import SmoothHinge
@@ -20,50 +19,6 @@
2019
from .sag_fast import L1Penalty
2120

2221

23-
def get_auto_step_size(X, alpha, loss, gamma=None, sample_weight=None):
24-
"""Compute automatic step size for SAG solver
25-
Stepsize computed using the following objective:
26-
minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i)
27-
+ alpha * 0.5 * ||w||^2_2
28-
Parameters
29-
----------
30-
X : ndarray
31-
Array of samples x_i.
32-
alpha : float
33-
Constant that multiplies the l2 penalty term.
34-
loss : string, in {"log", "squared"}
35-
The loss function used in SAG solver.
36-
37-
Returns
38-
-------
39-
step_size : float
40-
Step size used in SAG/SAGA solver.
41-
"""
42-
if sample_weight is None:
43-
weighted_norms = row_norms(X, squared=True)
44-
else:
45-
weighted_norms = sample_weight * row_norms(X, squared=True)
46-
L = np.max(weighted_norms)
47-
n_samples = X.shape[0]
48-
49-
if loss == 'log':
50-
# inverse Lipschitz constant for log loss
51-
lipschitz_constant = 0.25 * L + alpha
52-
elif loss == 'squared':
53-
lipschitz_constant = L + alpha
54-
elif loss == 'modified_huber':
55-
lipschitz_constant = 2 * L + alpha
56-
elif loss == 'smooth_hinge':
57-
lipschitz_constant = L + gamma + alpha
58-
elif loss == 'squared_hinge':
59-
lipschitz_constant = 2 * L + alpha
60-
else:
61-
raise ValueError("`auto` stepsize is only available for `squared` or "
62-
"`log` losses (got `%s` loss). Please specify a "
63-
"stepsize." % loss)
64-
return 1.0 / lipschitz_constant
65-
66-
6722
class _BaseSAG(object):
6823

6924
def _get_loss(self):
@@ -92,9 +47,10 @@ def _finalize_coef(self):
9247
self.coef_scale_.fill(1.0)
9348

9449
def _fit(self, X, Y, sample_weight):
95-
n_samples, n_features = X.shape
9650
rng = self._get_random_state()
9751
adaptive_step_size = False
52+
ds = get_dataset(X, order="c")
53+
n_samples, n_features = ds.get_n_samples(), ds.get_n_features()
9854

9955
if sample_weight is None:
10056
sample_weight = np.ones(n_samples, dtype=np.float64)
@@ -103,7 +59,7 @@ def _fit(self, X, Y, sample_weight):
10359

10460
if self.eta is None or self.eta in ('auto', 'line-search'):
10561
step_size = get_auto_step_size(
106-
X, self.alpha, self.loss, self.gamma, sample_weight=sample_weight)
62+
ds, self.alpha, self.loss, self.gamma, sample_weight=sample_weight)
10763
if self.verbose > 0:
10864
print("Auto stepsize: %s" % self.eta)
10965
if self.eta == 'line-search':
@@ -116,7 +72,6 @@ def _fit(self, X, Y, sample_weight):
11672
penalty = self._get_penalty()
11773
n_vectors = Y.shape[1]
11874
n_inner = int(self.n_inner * n_samples)
119-
ds = get_dataset(X, order="c")
12075

12176
self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
12277
self.coef_scale_ = np.ones(n_vectors, dtype=np.float64)
@@ -190,6 +145,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
190145
self.is_saga = False
191146

192147
def fit(self, X, y, sample_weight=None):
148+
"""
149+
Parameters
150+
----------
151+
X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
152+
y : numpy array of size (n_samples,)
153+
sample_weight : numpy array of size (n_samples,), optional
154+
155+
Returns
156+
-------
157+
self
158+
"""
193159
if not self.is_saga and self.penalty is not None:
194160
raise ValueError('Penalties in SAGClassifier. Please use '
195161
'SAGAClassifier instead.'
@@ -306,6 +272,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
306272
self.is_saga = False
307273

308274
def fit(self, X, y, sample_weight=None):
275+
"""
276+
Parameters
277+
----------
278+
X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
279+
y : numpy array of size (n_samples,)
280+
sample_weight : numpy array of size (n_samples,), optional
281+
282+
Returns
283+
-------
284+
self
285+
"""
309286
if not self.is_saga and self.penalty is not None:
310287
raise ValueError('Penalties are not supported in SAGRegressor. '
311288
'Please use SAGARegressor instead.')

0 commit comments

Comments
 (0)