Scikit-learn

Concrete-ML is compatible with sklearn APIs such as Pipeline() or GridSearch(), which are popular model selection methods.

Here is a simple example of such a process:

from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from concrete.ml.sklearn.xgb import XGBClassifier


# Get dataset and split into train and test
X, y = load_breast_cancer(return_X_y=True)

# Split the train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8)

# Define our model
model = XGBClassifier(n_jobs=1, n_bits=3)

# Define the pipeline
# We will normalize the data and apply a PCA before fitting the model
pipeline = Pipeline([("standard_scaler", StandardScaler()), ("pca", PCA()), ("model", model)])

# Define the parameters to tune
param_grid = {
    "pca__n_components": [5, 10, 15],
    "model__max_depth": [2, 3, 5],
    "model__n_estimators": [5, 10, 20],
}

# Instantiate the grid search with 5-fold cross validation on all available cores
grid = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, scoring="accuracy")

# Launch the grid search
grid.fit(X_train, y_train)

# Print the best parameters found
print(f"Best parameters found: {grid.best_params_}")

# Output:
#   Best parameters found:
#   {'model__max_depth': 5, 'model__n_bits': 6, 'model__n_estimators': 20, 'pca__n_components': 15}

# Currently we only focus on model inference in FHE
# The data transformation will be done in clear (client machine)
# while the model inference will be done in FHE on a server.
# The pipeline can be split into 2 parts:
#   1. data transformation
#   2. estimator
best_pipeline = grid.best_estimator_
data_transformation_pipeline = best_pipeline[:-1]
clf = best_pipeline[-1]

# Transform test set
X_train_transformed = data_transformation_pipeline.transform(X_train)
X_test_transformed = data_transformation_pipeline.transform(X_test)

# Evaluate the model on the test set (no FHE)
y_pred_clear = clf.predict(X_test_transformed)
print(f"Test accuracy: {(y_pred_clear == y_test).mean()}")

# Output:
#   Test accuracy: 0.9521

# Compile the model to FHE
clf.compile(X_train_transformed)

# Run the model in FHE
# Warning: this will take a while.
#          It is recommended to run this with a very small batch of example first
#          (e.g. N_TEST_FHE = 1)
# Note that here the encryption and decryption is done behind the scene.
N_TEST_FHE = 1
y_pred_fhe = clf.predict(X_test_transformed[:N_TEST_FHE], execute_in_fhe=True)

# Assert that FHE predictions are the same a the clear predictions
print(f"{(y_pred_fhe == y_pred_clear[:N_TEST_FHE]).sum()} "
      f"examples over {N_TEST_FHE} have a FHE inference equal to the clear inference.")

Last updated