/*
* 2016-30729, 11th assignment
* Coefficient algorithm was referenced from https://www.codeproject.com/articles/566326/multi-linear-regression-in-java
* JAMA lib was used for matrix computations
*/
package com.sephiroce.math;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import Jama.Matrix;
public class Statistics {
public static Matrix calculate(Matrix X_, Matrix Y) {
Matrix X = new Matrix(X_.getRowDimension(), X_.getColumnDimension() + 1);
for (int i = 0; i < X_.getRowDimension(); i++) {
for (int j = 0; j < X_.getColumnDimension(); j++)
X.set(i, j, X_.get(i, j));
X.set(i, X_.getColumnDimension(), 1.0);
}
Matrix Xtr = X.transpose();
Matrix XXtr = Xtr.times(X); // X'X
Matrix inverse_of_XXtr = XXtr.inverse();
if (inverse_of_XXtr == null) {
System.out
.println("Matrix X'X does not have any inverse. So MLR failed to create the model for these data.");
return null;
}
Matrix XtrY = Xtr.times(Y); // X'Y
return inverse_of_XXtr.times(XtrY); // (X'X)^-1 X'Y
}
public static void main(String[] args) throws IOException {
args = new String[3];
args[0] = "winequality-white.csv.1";
args[1] = "12";
args[2] = "12";
System.out.println("The input file must have headers and be separated by \",\"");
int varN = Integer.parseInt(args[1]);
if (varN > 20) {
System.out.println("!!WARN: The time\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 complexity of this program is O(2^n). You need paient.");
}
if (varN > 64) {
System.err.println(
"!!SORRY: This program can handle only less than 64 bits since it makes the combination of features by using 64bit bits.");
return;
}
BufferedReader br = new BufferedReader(new FileReader(args[0]));
int resId = Integer.parseInt(args[2]);
double maxARS = 0.0;
ArrayList<Double[]> data = new ArrayList<Double[]>();
String line = null;
// Load header
line = br.readLine();
String[] headers = line.split(",");
if (headers.length != varN) {
System.out.println("The number of variables are incorrect");
br.close();
return;
}
double SSE, SST = 0;
double mT = 0;
while ((line = br.readLine()) != null) {
Double[] temp = new Double[varN];
String[] temp2 = line.split(",");
for (int i = 0; i < varN; i++) {
temp[i] = Double.parseDouble(temp2[i]);
if (i == resId - 1)
mT += temp[i];
}
data.add(temp);
&\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0nbsp;}
br.close();
System.out.println(data.size() + " data loaded");
mT /= data.size();
double[][] dataX = new double[data.size()][];
double[][] dataY = new double[data.size()][];
for (int i = 0; i < data.size(); i++) {
dataX[i] = new double[varN - 1];
dataY[i] = new double[1];
for (int j = 0, k = 0; j < varN; j++) {
if (j == resId - 1) {
SST += Math.pow(mT - data.get(i)[j], 2);
dataY[i][0] = data.get(i)[j];
} else {
dataX[i][k++] = data.get(i)[j];
}
}
}
System.out.println(SST);
final Matrix Y = new Matrix(dataY);
// Find best feature : exhaustive search
Integer[] selectedFeature = new Integer[64];
Double[] selectedCoeff = new Double[64];
Integer[] maxSelectedFeature = new Integer[64];
int maxP = 0;
for (long i = (long) Math.pow(2, (varN - 1)); i > 1; i--) {
long temp = i - 1;
int p = 0;
// feature selection
double[][] dataX_ = new double[data.size()][];
for (int j = 0; j < varN; j++) {
if (j == resId - 1)
continue;
if ((temp & 1L) == 1) {
&\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0nbsp; selectedFeature[p++] = j;
}
temp = temp >> 1;
}
for (int k = 0; k < data.size(); k++) {
dataX_[k] = new double[p];
for (int j = 0; j < p; j++) {
dataX_[k][j] = data.get(k)[selectedFeature[j]];
}
}
Matrix coeff = calculate(new Matrix(dataX_), Y);
SSE = 0;
// get coefficient
for (int k = 0; k < data.size(); k++) {
double y_hat = coeff.get(coeff.getRowDimension() - 1, 0);
for (int j = 0; j < coeff.getRowDimension() - 1; j++)
y_hat += data.get(k)[selectedFeature[j]] * coeff.get(j, 0);
SSE += Math.pow(y_hat - data.get(k)[resId - 1], 2);
}
// get adjRSquare
double ars = 1 - (SSE / SST) * ((double) (data.size() - 1) / (double) (data.size() - p - 1));
// isMAX
if (ars > maxARS) {
maxARS = ars;
maxP = p;
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 System.arraycopy(selectedFeature, 0, maxSelectedFeature, 0, maxP);
for(int k=0;k<p;k++){
selectedCoeff[k] = coeff.get(k, 0);
}
selectedCoeff[p] = coeff.get(p, 0);
}
}
System.out.println(" maxR^2_adj : " + maxARS + "
Selected features are");
System.out.println("bias\t"+selectedCoeff[maxP]);
for (int i = 0; i < maxP; i++) {
System.out.println(headers[maxSelectedFeature[i]]+"\t"+selectedCoeff[i]);
}
}
}