Image RemovedImage Added
This guide explains all of the elements needed to successfully develop and plug in a new MADlib® module.
...
Code Block |
---|
|
/**
* @brief Update state with a new data point
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(const double x){
double diff = (x - avg);
double normalizer = static_cast<double>(numRows + 1);
// online update mean
this->avg += diff / normalizer;
// online update variance
double new_diff = (x - avg);
double a = static_cast<double>(numRows) / normalizer;
this->var = (var * a) + (diff * new_diff) / normalizer;
}
/**
* @brief Merge with another State object
*
* We update mean and variance in a online fashion
* to avoid intermediate large sum.
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(
const AvgVarTransitionState<OtherHandle> &inOtherState) {
if (mStorage.size() != inOtherState.mStorage.size())
throw std::logic_error("Internal error: Incompatible transition "
"states");
double avg_ = inOtherState.avg;
double var_ = inOtherState.var;
uint64_t numRows_ = static_cast<uint64_t>(inOtherState.numRows);
double totalNumRows = static_cast<double>(numRows + numRows_);
double p = static_cast<double>(numRows) / totalNumRows;
double p_ = static_cast<double>(numRows_) / totalNumRows;
double totalAvg = avg * p + avg_ * p_;
double a = avg - totalAvg;
double a_ = avg_ - totalAvg;
numRows += numRows_;
var = p * var + p_ * var_ + p * a * a + p_ * a_ * a_;
avg = totalAvg;
return *this;
} |
...
Code Block |
---|
|
SELECT madlib.avg_var(second_attack) FROM patients;
-- ************ --
-- Result --
-- ************ --
+-------------------+
| avg_var |
|-------------------|
| [0.5, 0.25, 20.0] |
+-------------------+
-- (average, variance, count) -- |
...
Anchor |
---|
| Adding Iterative Module |
---|
| Adding Iterative Module |
---|
|
Adding An Iterative UDF
...
The example below demonstrates the usage of madlib.logregr_simple_train
on the patients
table we used earlier. The trained classification model is stored in the table called logreg_mdl
and can be viewed using standard SQL query.
Code Block |
---|
|
SELECT madlib.logregr_simple_train(
'patients', -- source table
'logreg_mdl', -- output table
'second_attack', -- labels
'ARRAY[1, treatment, trait_anxiety]'); -- features
SELECT * FROM logreg_mdl;
-- ************ --
-- Result --
-- ************ --
+--------------------------------------------------+------------------+
| coef | log_likelihood |
|--------------------------------------------------+------------------|
| [-6.27176619714, -0.84168872422, 0.116267554551] | -9.42379 |
+--------------------------------------------------+------------------+ |
...