Today we are announcing the general availability of the native PREDICT Transact-SQL function in Azure SQL Database. The PREDICT function allows you to perform scoring in real-time using certain RevoScaleR or revoscalepy models in a SQL query without invoking the R or Python runtime.
The PREDICT function support was added in SQL Server 2017. It is a table-valued function that takes a RevoScaleR or revoscalepy model & data (in the form of a table or view or query) as inputs and generates predictions based on the machine learning model. More details of the PREDICT function can be found here.
/* Step 1: Setup schema */ drop table if exists iris_data, iris_models; go create table iris_data ( id int not null identity primary key , "Sepal.Length" float not null, "Sepal.Width" float not null , "Petal.Length" float not null, "Petal.Width" float not null , "Species" varchar(100) null ); create table iris_models ( model_name varchar(30) not null primary key, model varbinary(max) not null, native_model varbinary(max) not null ); go /* Step 2: Populate test data from iris dataset in R */ insert into iris_data ("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species") execute sp_execute_external_script @language = N'R' , @script = N'iris_data <- iris;' , @input_data_1 = N'' , @output_data_1_name = N'iris_data'; go /* Step 3: Create procedure for training model */ create or alter procedure generate_iris_model (@trained_model varbinary(max) OUTPUT, @native_trained_model varbinary(max) OUTPUT) as begin execute sp_execute_external_script @language = N'R' , @script = N' # Build decision tree model to predict species based on sepal/petal attributes iris_model <- rxDTree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris_rx_data); # Serialize model to binary format for storage in SQL Server trained_model <- as.raw(serialize(iris_model, connection=NULL)); # Serialize model to native binary format for scoring using PREDICT function in SQL Server native_trained_model <- rxSerializeModel(iris_model, realtimeScoringOnly = TRUE) ' , @input_data_1 = N' select "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species" from iris_data' , @input_data_1_name = N'iris_rx_data' , @params = N' @trained_model varbinary(max) OUTPUT, @native_trained_model varbinary(max) OUTPUT' , @trained_model = @trained_model OUTPUT , @native_trained_model = @native_trained_model OUTPUT; end; go /* Step 3: Train & store a decision tree model that will predict species of flowers */ declare @model varbinary(max), @native_model varbinary(max); exec generate_iris_model @model OUTPUT, @native_model OUTPUT; delete from iris_models where model_name = 'iris.dtree'; insert into iris_models (model_name, model, native_model) values('iris.dtree', @model, @native_model); select model_name , datalength(model)/1024. as model_size_kb , datalength(native_model)/1024. as native_model_size_kb from iris_models; go /* Step 4: Generate predictions using PREDICT function */ declare @native_model varbinary(max) = (select native_model from iris_models where model_name = 'iris.dtree'); select p.*, d.Species as "Species.Actual", d.id from PREDICT(MODEL = @native_model, DATA = dbo.iris_data as d) with(setosa_Pred float, versicolor_Pred float, virginica_Pred float) as p; go