From 40ad82422a7503ac9996005ce3f1a51c5f3838f5 Mon Sep 17 00:00:00 2001 From: Rafael Escaleira Date: Tue, 11 Jun 2024 17:47:21 -0400 Subject: [PATCH] Update linear regression class --- Package.swift | 5 +- .../MachineLearning/LinearRegression.swift | 104 +++++++++++++++--- .../MachineLearningTests.swift | 4 +- 3 files changed, 92 insertions(+), 21 deletions(-) diff --git a/Package.swift b/Package.swift index db12a01..25ce253 100644 --- a/Package.swift +++ b/Package.swift @@ -30,9 +30,12 @@ let package = Package( .product( name: "RefdsShared", package: "refds-shared"), + ], + swiftSettings: [ + .define("ACCELERATE_NEW_LAPACK") ]), .testTarget( name: "RefdsAlgorithmTests", - dependencies: ["RefdsAlgorithm"]), + dependencies: ["RefdsAlgorithm"]) ] ) diff --git a/Sources/RefdsAlgorithm/MachineLearning/LinearRegression.swift b/Sources/RefdsAlgorithm/MachineLearning/LinearRegression.swift index 516cd73..1252662 100644 --- a/Sources/RefdsAlgorithm/MachineLearning/LinearRegression.swift +++ b/Sources/RefdsAlgorithm/MachineLearning/LinearRegression.swift @@ -1,27 +1,95 @@ import Foundation +import Accelerate -class LinearRegression { - private var x: [Double] - private var y: [Double] +public class LinearRegression { + private var features: [[NSNumber]] + private var targets: [NSNumber] + private var coefficients: [Double] = [] - public init(x: [Double], y: [Double]) { - self.x = x - self.y = y - } + private let semaphore = DispatchSemaphore(value: 0) + private let queue = DispatchQueue( + label: "refds.algorithm.linearRegression", + qos: .background + ) - private func average(_ input: [Double]) -> Double { - return input.reduce(0, +) / Double(input.count) + public init( + features: [[NSNumber]], + targets: [NSNumber] + ) { + self.features = features + self.targets = targets + + queue.async { self.makeCoefficients() } } + + private func makeCoefficients() { + self.coefficients = [] + let rows = features.count + + guard let cols = features[safe: 0]?.count, + rows == targets.count, + !features.isEmpty && !features[0].isEmpty + else { + semaphore.signal() + return + } + + var X = features + for i in 0 ..< rows { X[i].insert(1.0, at: 0) } + + let flatX = X.flatMap { $0 }.map { $0.doubleValue } + let flatY = targets.map { $0.doubleValue } + var coefficients = [Double](repeating: 0.0, count: cols + 1) + + var XT = [Double](repeating: 0.0, count: rows * (cols + 1)) + vDSP_mtransD(flatX, 1, &XT, 1, vDSP_Length(cols + 1), vDSP_Length(rows)) + + var XTX = [Double](repeating: 0.0, count: (cols + 1) * (cols + 1)) + vDSP_mmulD(XT, 1, flatX, 1, &XTX, 1, vDSP_Length(cols + 1), vDSP_Length(cols + 1), vDSP_Length(rows)) - private func multiply(_ a: [Double], _ b: [Double]) -> [Double] { - return zip(a, b).map(*) + var N = __CLPK_integer(cols + 1) + var pivots = [__CLPK_integer](repeating: 0, count: cols + 1) + var workspace = [Double](repeating: 0.0, count: cols + 1) + var error: __CLPK_integer = 0 + + let _ = withUnsafeMutablePointer(to: &N) { + dgetrf_($0, $0, &XTX, $0, &pivots, &error) + } + + guard error == 0 else { + semaphore.signal() + return + } + + let _ = withUnsafeMutablePointer(to: &N) { + dgetri_($0, &XTX, $0, &pivots, &workspace, $0, &error) + } + + guard error == 0 else { + semaphore.signal() + return + } + + var XTY = [Double](repeating: 0.0, count: cols + 1) + vDSP_mmulD(XT, 1, flatY, 1, &XTY, 1, vDSP_Length(cols + 1), 1, vDSP_Length(rows)) + vDSP_mmulD(XTX, 1, XTY, 1, &coefficients, 1, vDSP_Length(cols + 1), 1, vDSP_Length(cols + 1)) + + self.coefficients = coefficients + semaphore.signal() } - - public var result: (Double) -> Double { - let sum1 = average(multiply(x, y)) - average(x) * average(y) - let sum2 = average(multiply(x, x)) - pow(average(x), 2) - let slope = sum1 / sum2 - let intercept = average(y) - slope * average(x) - return { x in intercept + slope * x } + + public func predict(for features: [Double]) -> Double? { + semaphore.wait() + guard !coefficients.isEmpty, + var prediction = coefficients[safe: 0] + else { return nil } + + for i in features.indices { + if let coefficient = coefficients[safe: i + 1] { + prediction += coefficient * features[i] + } + } + semaphore.signal() + return prediction } } diff --git a/Tests/RefdsAlgorithmTests/MachineLearningTests.swift b/Tests/RefdsAlgorithmTests/MachineLearningTests.swift index 2d7df44..0f9a41b 100644 --- a/Tests/RefdsAlgorithmTests/MachineLearningTests.swift +++ b/Tests/RefdsAlgorithmTests/MachineLearningTests.swift @@ -5,8 +5,8 @@ final class MachineLearningTests: XCTestCase { func testLinearRegressionShouldCompletesWithValidValue() { let carAge: [Double] = [10, 8, 3, 3, 2, 1] let carPrice: [Double] = [500, 400, 7000, 8500, 11000, 10500] - let linearRegression = LinearRegression(x: carAge, y: carPrice) - let result = linearRegression.result(4) + let linearRegression = LinearRegression(features: carAge.map { [$0] }, targets: carPrice) + let result = linearRegression.predict(for: [4]) ?? .zero XCTAssertEqual(Int(result), 6952) } }