diff --git a/src/structsvm/hamming_costs.py b/src/structsvm/hamming_costs.py index 4a6a86a..2b26078 100644 --- a/src/structsvm/hamming_costs.py +++ b/src/structsvm/hamming_costs.py @@ -27,3 +27,4 @@ def __init__( self.set_coefficients(coefficients) self.set_offset(offset) + self.set_scaling_factor(np.sum(mask) if mask is not None else ground_truth.size) diff --git a/src/structsvm/linear_costs.py b/src/structsvm/linear_costs.py index a6ef933..7123ec5 100644 --- a/src/structsvm/linear_costs.py +++ b/src/structsvm/linear_costs.py @@ -18,3 +18,9 @@ def get_coefficients(self) -> np.ndarray: def get_offset(self) -> float: return self.offset + + def set_scaling_factor(self, scaling_factor: float) -> None: + self.scaling_factor = scaling_factor + + def get_scaling_factor(self) -> float: + return self.scaling_factor diff --git a/src/structsvm/soft_margin_loss.py b/src/structsvm/soft_margin_loss.py index 3e0cdec..a96718e 100644 --- a/src/structsvm/soft_margin_loss.py +++ b/src/structsvm/soft_margin_loss.py @@ -60,9 +60,14 @@ def __init__( self._costs = costs self._b = self._costs.get_offset() self._g = self._costs.get_coefficients() + self._scale = self._costs.get_scaling_factor() + + self._b = self._b / self._scale + self._g = self._g / self._scale + self._features = self._features / self._scale # combined features of the ground truth and current y* - self._d = features @ ground_truth + self._d = self._features @ ground_truth # setup solver self._solver = ilpy.Solver(self._num_variables, ilpy.VariableType.Binary) @@ -106,7 +111,8 @@ def value_and_gradient(self, w: np.ndarray) -> tuple[float, np.ndarray]: solution = self._solver.solve() # read optimal value L(w) - value = solution.get_value() + # get_value() excludes the constant term, so add it back + value = solution.get_value() + a + self._b # ∂L(w)/∂w = φ(x')y' - φ(x')y* # = d - e