Commit 4b7d32f9 authored by Luis Fernandez Ruiz's avatar Luis Fernandez Ruiz
Browse files

Delete previous version of files:

- create_img_param.m
- save_sim_sphere.m
- regression_radius_stacked_model.py
parent 38c5201f
function create_img_param(save_img_path, distance, collimation, wav, bg, radius)
mono_tof = 'mono';
delta_wav = 10; %percentage
instrument = 'd22';
sample_shape = 'rectangular';
sample_size = [10e-3 10e-3];
attenuator = 1;
thickness = 0.1;
%Prepare scattering model parameter structure
scatter_model = 'Sphere';
params_scatter.poly_fwhm = 0;
params_scatter.contrast = 6e-6;
params_scatter.scale = 0.01;
params_scatter.background = bg;
background_model = 'Empty Cell'; %Empty Cell,Cryostat Ox7T
horiz_beam_centre_params = [0 -randi([15 35])];
horiz_beam_centre = horiz_beam_centre_params(1);
params_scatter.radius = radius;
%%%%%%%calcul%%%%%%%%
params = zeros(1,39);
if strcmp(instrument,'d11')
params(1) = 1;
elseif strcmp(instrument, 'd22')
params(1) = 2;
elseif strcmp(instrument, 'd33')
params(1) = 3;
end
if strcmp(mono_tof,'mono')
params(2) = 1;
elseif strcmp(mono_tof, 'tof')
params(2) = 2;
end
params(3) = distance;
params(4) = collimation;
if strcmp(sample_shape,'rectangular')
params(5) = 1;
elseif strcmp(sample_shape, 'circular')
params(5) = 2;
end
if params(5) == 1
params(6) = sample_size(1);
elseif params(5) == 2
params(6) = sample_size;
end
params(7) = wav;
if attenuator == 1
params(8) = 1;
elseif attenuator == 10
params(8) = 2;
elseif attenuator == 100
params(8) = 3;
end
params(9) = thickness;
if strcmp(background_model,'Empty Cell')
params(10) = 1;
elseif strcmp(background_model,'Cryostat Ox7T')
params(10) = 2;
end
if strcmp(scatter_model,'Sphere')
params(11) = 1;
params(12) = params_scatter.radius;
params(13) = params_scatter.poly_fwhm;
params(14) = params_scatter.contrast;
params(15) = params_scatter.scale;
elseif strcmp(scatter_model,'Core-Shell Sphere')
params(11) = 2;
params(16) = params_scatter.radius;
params(17) = params_scatter.poly_fwhm;
params(18) = params_scatter.shell;
params(19) = params_scatter.rho_core;
params(20) = params_scatter.rho_shell;
params(21) = params_scatter.rho_matrix;
params(22) = params_scatter.scale;
elseif strcmp(scatter_model,'Guinier')
params(11) = 3;
params(23) = params_scatter.rg;
params(24) = params_scatter.i0;
elseif strcmp(scatter_model,'Porod')
params(11) = 4;
params(25) = params_scatter.surf;
params(26) = params_scatter.contrast;
elseif strcmp(scatter_model,'Flux Line Bragg Peak')
params(11) = 5;
params(27) = params_scatter.field;
params(28) = params_scatter.width;
params(29) = params_scatter.az_cor;
params(30) = params_scatter.rad_cor;
params(31) = params_scatter.pen_depth;
params(32) = params_scatter.san;
params(33) = params_scatter.phi;
params(34) = params_scatter.rot;
elseif strcmp(scatter_model,'Porod + Magnetic Porod')
params(11) = 6;
params(35) = params_scatter.surf;
params(36) = params_scatter.contrast;
params(37) = params_scatter.magsurf;
params(38) = params_scatter.magcontrast;
elseif strcmp(scatter_model,'H2O')
params(11) = 7;
elseif strcmp(scatter_model,'D2O')
params(11) = 8;
elseif strcmp(scatter_model,'Ribosome')
params(11) = 9;
elseif strcmp(scatter_model,'Gabel Protien')
params(11) = 10;
end
params(39) = params_scatter.background;
%Call the instrument simulation
output = sim_calc('instrument',instrument,'mono_tof',mono_tof,'measurement_time',600,'det',distance,'det2',10,'det1',5,....
'col',collimation,'source_size',[45e-3,55e-3],'sample_size',sample_size,'wav',wav,'delta_wav',delta_wav,....
'bx',0,'by',0,'attenuator',attenuator,'thickness',thickness,....
'scatter_model',scatter_model,'scatter_model_params',params_scatter,....
'background_model',background_model,'background_model_params',[],....
'blocked_model','Blocked Beam','blocked_model_params',[], 'horiz_beam_centre', horiz_beam_centre);%[45e-3,55e-3]
if isempty(output)
disp('Sans model calculation failed somewhere: Please consult output text window')
end
filename2 = [save_img_path '/Sphere' background_model '_col' num2str(collimation) '_dist' num2str(distance) '_radius' num2str(params_scatter.radius) '_poly' num2str(params_scatter.poly_fwhm) '_contrast' num2str(params_scatter.contrast) '_scale' num2str(params_scatter.scale) '_bg' num2str(params_scatter.background) '_wav' num2str(wav) '_thickness' num2str(thickness) '_beamcenter' num2str(horiz_beam_centre) '.jpg'];
data = abs(output.data1(:,:,1));
%fid = fopen(filename,'w');
%fid3 = fopen(filename3,'w');
%Plot results, just for fun
frames = size(output.data1,3);
for n = 1:frames
figure('Visible','off')
data = abs(output.data1(:,:,n));
%figure
pcolor(log10(data));shading interp;
ax = gca;
set(ax,'YTick',[],'XTick',[])
fig = gcf;%s\t%f\t
%csvwrite(filename, data);
%csvwrite(filename3, params);
[imageData, alpha] = export_fig;
imwrite(imageData,filename2);
end
%fclose(fid);
%fclose(fid3);
%fprintf('%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\n','col',collimation,'dist', distance, 'radius',params_scatter.radius, 'poly', params_scatter.poly_fwhm, 'contrast',params_scatter.contrast, 'scale',params_scatter.scale,'bg',params_scatter.background, 'wav', wav, 'thickness', thickness, 'beam center', horiz_beam_centre)
close all
end
\ No newline at end of file
function save_sim_sphere(save_img_path)
%save_img_path = '/home/dpt/fernandez-ruiz/sim/sim_data/Sphere/log_image_moved';
%distance = 18; % D22, distance 1-18m
col_params = [2 2.8 4 5.6 8 11.2 14.4 17.6]; %D22
dist_params = [1.4 2.8 4 5.6 8 11.2 14.4 17.6];
background_params = [0 0.05 1]; % water = 1;D2O = 0.05
mono_tof = 'mono';
wav_params = [5 6 8 12]; %i = 2:2:30
delta_wav = 10; %percentage
instrument = 'd22';
sample_shape = 'rectangular';
sample_size = [10e-3 10e-3];
attenuator = 1;
thickness = 0.1;
%Prepare scattering model parameter structure
scatter_model = 'Sphere';
radius_params = [20 100 180 260 340 420 500 580 660 740 820]; %randi([20 800],1)
contrast_params = [1 2 3 4 5 6 7 8 9 10 11]/10e5; %randi([1 10],1)/10e5
scale_params = [1 10 20 30 40 50 60 70 80 90 100]/100; %randi([1 100],1)/100
poly_params = [1 20 40 60 80 100 120 140 160 180 200]/10; %randi([0 200],1)/10
params_scatter.radius = 60;
params_scatter.poly_fwhm = 10;
params_scatter.contrast = 6e-6;
params_scatter.scale = 0.01;
params_scatter.background = 0;
background_model = 'Empty Cell'; %Empty Cell,Cryostat Ox7T
horiz_beam_centre_params = [0 -randi([15 35])];
horiz_beam_centre = horiz_beam_centre_params(1);
count = 0;
while count < 1
params_scatter.radius = 250;
params_scatter.contrast = 6e-6;
params_scatter.scale = 0.01;
%params_scatter.length = randi([50 1000],1);
params_scatter.poly_fwhm = 0; %randi([0 200],1)/10;
for k = 1:4 %1:4
wav = wav_params(k); %i = 2:2:30
for b = 2:3
params_scatter.background = background_params(b);
for i = 1:8 %1:8
collimation = col_params(i);
distance = dist_params(i);
for c = 20:8:800 %20:8:800
params_scatter.radius = c;
horiz_beam_centre = -randi([15 35]);
%%%%%%%calcul%%%%%%%%
params = zeros(1,39);
if strcmp(instrument,'d11')
params(1) = 1;
elseif strcmp(instrument, 'd22')
params(1) = 2;
elseif strcmp(instrument, 'd33')
params(1) = 3;
end
if strcmp(mono_tof,'mono')
params(2) = 1;
elseif strcmp(mono_tof, 'tof')
params(2) = 2;
end
params(3) = distance;
params(4) = collimation;
if strcmp(sample_shape,'rectangular')
params(5) = 1;
elseif strcmp(sample_shape, 'circular')
params(5) = 2;
end
if params(5) == 1
params(6) = sample_size(1);
elseif params(5) == 2
params(6) = sample_size;
end
params(7) = wav;
if attenuator == 1
params(8) = 1;
elseif attenuator == 10
params(8) = 2;
elseif attenuator == 100
params(8) = 3;
end
params(9) = thickness;
if strcmp(background_model,'Empty Cell')
params(10) = 1;
elseif strcmp(background_model,'Cryostat Ox7T')
params(10) = 2;
end
if strcmp(scatter_model,'Sphere')
params(11) = 1;
params(12) = params_scatter.radius;
params(13) = params_scatter.poly_fwhm;
params(14) = params_scatter.contrast;
params(15) = params_scatter.scale;
elseif strcmp(scatter_model,'Core-Shell Sphere')
params(11) = 2;
params(16) = params_scatter.radius;
params(17) = params_scatter.poly_fwhm;
params(18) = params_scatter.shell;
params(19) = params_scatter.rho_core;
params(20) = params_scatter.rho_shell;
params(21) = params_scatter.rho_matrix;
params(22) = params_scatter.scale;
elseif strcmp(scatter_model,'Guinier')
params(11) = 3;
params(23) = params_scatter.rg;
params(24) = params_scatter.i0;
elseif strcmp(scatter_model,'Porod')
params(11) = 4;
params(25) = params_scatter.surf;
params(26) = params_scatter.contrast;
elseif strcmp(scatter_model,'Flux Line Bragg Peak')
params(11) = 5;
params(27) = params_scatter.field;
params(28) = params_scatter.width;
params(29) = params_scatter.az_cor;
params(30) = params_scatter.rad_cor;
params(31) = params_scatter.pen_depth;
params(32) = params_scatter.san;
params(33) = params_scatter.phi;
params(34) = params_scatter.rot;
elseif strcmp(scatter_model,'Porod + Magnetic Porod')
params(11) = 6;
params(35) = params_scatter.surf;
params(36) = params_scatter.contrast;
params(37) = params_scatter.magsurf;
params(38) = params_scatter.magcontrast;
elseif strcmp(scatter_model,'H2O')
params(11) = 7;
elseif strcmp(scatter_model,'D2O')
params(11) = 8;
elseif strcmp(scatter_model,'Ribosome')
params(11) = 9;
elseif strcmp(scatter_model,'Gabel Protien')
params(11) = 10;
end
params(39) = params_scatter.background;
%Call the instrument simulation
output = sim_calc('instrument',instrument,'mono_tof',mono_tof,'measurement_time',600,'det',distance,'det2',10,'det1',5,....
'col',collimation,'source_size',[45e-3,55e-3],'sample_size',sample_size,'wav',wav,'delta_wav',delta_wav,....
'bx',0,'by',0,'attenuator',attenuator,'thickness',thickness,....
'scatter_model',scatter_model,'scatter_model_params',params_scatter,....
'background_model',background_model,'background_model_params',[],....
'blocked_model','Blocked Beam','blocked_model_params',[], 'horiz_beam_centre', horiz_beam_centre);%[45e-3,55e-3]
if isempty(output)
disp('Sans model calculation failed somewhere: Please consult output text window')
end
%filename = ['./sim_data/Sphere/scatter_image/Sphere' background_model '_col' num2str(collimation) '_dist' num2str(distance) '_radius' num2str(params_scatter.radius) '_poly' num2str(params_scatter.poly_fwhm) '_contrast' num2str(params_scatter.contrast) '_scale' num2str(params_scatter.scale) '_bg' num2str(params_scatter.background) '_wav' num2str(wav) '_thickness' num2str(thickness) '_beamcenter' num2str(horiz_beam_centre) '.txt'];
filename2 = [save_img_path '/Sphere' background_model '_col' num2str(collimation) '_dist' num2str(distance) '_radius' num2str(params_scatter.radius) '_poly' num2str(params_scatter.poly_fwhm) '_contrast' num2str(params_scatter.contrast) '_scale' num2str(params_scatter.scale) '_bg' num2str(params_scatter.background) '_wav' num2str(wav) '_thickness' num2str(thickness) '_beamcenter' num2str(horiz_beam_centre) '.jpg'];
%filename3 = ['./sim_data/Sphere/params/Sphere' background_model '_col' num2str(collimation) '_dist' num2str(distance) '_radius' num2str(params_scatter.radius) '_poly' num2str(params_scatter.poly_fwhm) '_contrast' num2str(params_scatter.contrast) '_scale' num2str(params_scatter.scale) '_bg' num2str(params_scatter.background) '_wav' num2str(wav) '_thickness' num2str(thickness) '_beamcenter' num2str(horiz_beam_centre) '.txt'];
data = abs(output.data1(:,:,1));
%fid = fopen(filename,'w');
%fid3 = fopen(filename3,'w');
%Plot results, just for fun
frames = size(output.data1,3);
for n = 1:frames
figure('Visible','off')
data = abs(output.data1(:,:,n));
%figure
pcolor(log10(data));shading interp;
ax = gca;
set(ax,'YTick',[],'XTick',[])
fig = gcf;%s\t%f\t
%csvwrite(filename, data);
%csvwrite(filename3, params);
[imageData, alpha] = export_fig;
imwrite(imageData,filename2);
end
%fclose(fid);
%fclose(fid3);
fprintf('%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\t%s\t%f\n','col',collimation,'dist', distance, 'radius',params_scatter.radius, 'poly', params_scatter.poly_fwhm, 'contrast',params_scatter.contrast, 'scale',params_scatter.scale,'bg',params_scatter.background, 'wav', wav, 'thickness', thickness, 'beam center', horiz_beam_centre)
close all
end
end
end
end
count = count + 1;
end
\ No newline at end of file
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from matplotlib import pyplot as plt
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import os
import datetime
from time import strftime
from os.path import join
from sklearn.linear_model import ElasticNet, Lasso, BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import lightgbm as lgb
# Load the dataset :
# Load train and test data into pandas DataFrames
# Combine train and test data to process them together
def get_data(path):
# get train data
train_data_path = join(path, 'results_train.csv')
train = pd.read_csv(train_data_path)
train = train[(train["radius"] < 400)]
# get test data
test_data_path = join(path, 'results_test.csv')
test = pd.read_csv(test_data_path)
test = test[(test["radius"] < 400)]
return train, test
# def get_data(path):
# # get train data
# df_path = join(root, 'results_modif.csv')
# results_test
# df = pd.read_csv(df_path)
# df.drop(['col', 'mean_prediction'], axis=1, inplace=True)
# train, test = train_test_split(df, test_size=0.25, random_state=14)
# return train, test
def get_combined_data(path):
# reading train data
train, test = get_data(path)
target = train.radius
train.drop(['radius'], axis=1, inplace=True)
train_ID = train['Id']
test_ID = test['Id']
combined = train.append(test)
combined.reset_index(inplace=True)
combined.drop(['index', 'Id', 'col', 'mean_prediction'], inplace=True, axis=1)
return combined, target, train_ID, test_ID
# let’s define a function to get the columns that don’t have any missing values
def get_cols_with_no_nans(df,col_type):
'''
Arguments :
df : The dataframe to process
col_type :
num : to only get numerical columns with no nans
no_num : to only get nun-numerical columns with no nans
all : to get any columns with no nans
'''
if (col_type == 'num'):
predictors = df.select_dtypes(exclude=['object'])
elif (col_type == 'no_num'):
predictors = df.select_dtypes(include=['object'])
elif (col_type == 'all'):
predictors = df
else :
print('Error : choose a type (num, no_num, all)')
return 0
cols_with_no_nans = []
for col in predictors.columns:
if not df[col].isnull().any():
cols_with_no_nans.append(col)
return cols_with_no_nans
# ------------------------------------------------------------------------------------------------------------
# ---------------------------------------------- PREPARE DATA -----------------------------------------------
# ------------------------------------------------------------------------------------------------------------
# Start timer
start = datetime.datetime.now()
root = "/home/dpt/fernandez-ruiz/sim/sim_data/Sphere/log_image/20190506/"
# Load train and test data into pandas DataFrames
# train_data, test_data = get_data(root)
# Combine train and test data to process them together
combined, target, train_ID, test_ID = get_combined_data(root)
combined.describe()
# Get the columns that do not have any missing values .
num_cols = get_cols_with_no_nans(combined , 'num')
# cat_cols = get_cols_with_no_nans(combined , 'no_num')
cat_cols = ['dist', 'wav', 'predicted']
print ('Number of numerical columns with no nan values :',len(num_cols))
print ('Number of nun-numerical columns with no nan values :',len(cat_cols))
combined = combined[cat_cols]
combined.hist(figsize = (12,10))
plt.show()
# The correlation between the features
# train_data['Target'] = target
#
# C_mat = train_data.corr()
# fig = plt.figure(figsize = (15,15))
#
# sb.heatmap(C_mat, vmax = .8, square = True)
# plt.show()
# One Hot Encode The Categorical Features :
def oneHotEncode(df, colNames):
for col in colNames:
# if (df[col].dtype == np.dtype('object')):
dummies = pd.get_dummies(df[col], prefix=col)
df = pd.concat([df, dummies], axis=1)
# drop the encoded column
df.drop([col], axis=1, inplace=True)
return df
print('There were {} columns before encoding categorical features'.format(combined.shape[1]))
combined = oneHotEncode(combined, cat_cols)
print('There are {} columns after encoding categorical features'.format(combined.shape[1]))
# Now, split back combined dataFrame to training data and test data
def split_combined(length_train):
global combined
train = combined[:length_train]
test = combined[length_train:]
return train, test
len_train = len(target)
train, test = split_combined(len_train)
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------ TESTING SINGLE MODELS ------------------------------------------
# ------------------------------------------------------------------------------------------------------------
def rmsle_cv(model):
kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
rmse= np.sqrt(-cross_val_score(model, train.values, target, scoring="neg_mean_squared_error", cv = kf))
return(rmse)
n_folds = 5
# BASE MODELS
# LASSO Regression: This model may be very sensitive to outliers. So we need to made it more robust on them. For that we use the sklearn's Robustscaler() method on pipeline
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))
# Elastic Net Regression: again made robust to outliers
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
#Kernel Ridge Regression:
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
# Gradient Boosting Regression: with huber loss that makes it robust to outliers
GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
max_depth=4, max_features='sqrt',
min_samples_leaf=15, min_samples_split=10,
loss='huber', random_state =5)
# XGBoost:
model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468,
learning_rate=0.05, max_depth=3,
min_child_weight=1.7817, n_estimators=2200,
reg_alpha=0.4640, reg_lambda=0.8571,
subsample=0.5213, silent=1,
random_state =7, nthread = -1)
# LightGBM:
model_lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5,
learning_rate=0.05, n_estimators=720,
max_bin = 55, bagging_fraction = 0.8,
bagging_freq = 5, feature_fraction = 0.2319,
feature_fraction_seed=9, bagging_seed=9,
min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
model_rf = RandomForestRegressor(n_estimators = 500, min_samples_leaf=0.005, max_depth = 50, random_state = 42)
# BASE MODELS SCORE:
score = rmsle_cv(lasso)
print("\nLasso score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(ENet)
print("ElasticNet score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(KRR)
print("Kernel Ridge score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(GBoost)
print("Gradient Boosting score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(model_xgb)
print("Xgboost score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(model_lgb)
print("LGBM score: {:.4f} ({:.4f})\n" .format(score.mean(), score.std()))
score = rmsle_cv(model_rf)
print("RF score: {:.4f} ({:.4f})\n" .format(score.mean(), score.std()))
# ------------------------------------------------------------------------------------------------------------
# -------------------------------------- TESTING AVERAGE STACKED MODELS -------------------------------------
# ------------------------------------------------------------------------------------------------------------
# We build a new class to extend scikit-learn with our model and also to laverage encapsulation and code reuse
# Averaged base models class
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self, models):
self.models = models
# we define clones of the original models to fit the data in
def fit(self, X, y):
self.models_ = [clone(x) for x in self.models]
# Train cloned base models
for model in self.models_:
model.fit(X, y)
return self
# Now we do the predictions for cloned models and average them
def predict(self, X):
predictions = np.column_stack([
model.predict(X) for model in self.models_
])
return np.mean(predictions, axis=1)
averaged_models = AveragingModels(models=(model_xgb, GBoost))
score = rmsle_cv(averaged_models)
print(" Averaged base models score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
# STACKING AVERAGE MODELS CLASS
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self, base_models