Commit 8362071a authored by Luis Fernandez Ruiz's avatar Luis Fernandez Ruiz
Browse files

Update readme and code

parent 22cd13d5
......@@ -239,8 +239,7 @@ The structure of the file is:
* error for sample's parameters: distance between real and predicted sample's parameter value.
* **Original_misclassified.txt:** file (.txt format) produced copying and pasting the logs produced in [retrain.py](python/retrain.py)
when option *--print_misclassified_images* is set to *True*. It is used in
[misclassified_images.py](python/misclassified_images.py) to reclassify images.
* Inside **Sphere** folder, with *results.csv* and *suggest.csv* files, we can also find the histograms of radius error.
[misclassified_images.py](python/misclassified_images.py) to reclassify images.
### imgs
It contains two folders (one for each particle model). Each one of them have two folders: one with **simulated** scattered
......@@ -264,7 +263,17 @@ particles and the type of regression it has been applied in it.
The number in the name specifies the cluster for which that regression model applies. e.g. Sphere_rf_cluster_0.pkl
applies to *"bad guinier's"* scattered images (cluster 0).
## Datasets
All the datasets, which with we have generated the master *results.csv* files located in [doc_files](doc_files), are in:
/home/dpt/sci_share/ScatterImage/fernandez-ruiz/
\ No newline at end of file
/home/dpt/sci_share/ScatterImage/fernandez-ruiz/
Besides, we can fin a folder **results_logs**. In it, we have again two folders (one for each particle model). In each
one of them we can find several folders:
* **regression_randForest_errorRadius_wholeDataset:** inside **Sphere** folder, we can find the histograms of predicted
radius error when applying a regression (*random forest*) to the images inside [results.csv](doc_files/Sphere/results.csv)
* **Contribution of params:** images for selecting which sample parameters are relevant in the final images. For doing so,
we generate images letting all the parameters fixed except the one we are studying.
* **ImagesDistribution:** plots the distribution of images in [results.csv](doc_files/Sphere/results.csv) based on their
radius, CNN prediction, wavelength and distance. In this way we can figure out how to obtain Guinier, one ring images...
This diff is collapsed.
......@@ -208,7 +208,8 @@ start = datetime.datetime.now()
# ------------------------ VARIABLE DEFINITION ------------------------
# INTERNAL VARIABLES
reg_model_type = "rf" # random forest
database_path = "/home/dpt/fernandez-ruiz/TF_Results/GUI/"
database_path = "/users/fernandez-ruiz/scatteringimage/models/Sphere/" # path where output_graph and output_labels are
# (CNN model files from retrain.py), master results.csv and regression models are (in a folder whose name is the type of regression model)
GUI_img_path = "GUI_imgs"
results_file = "results.csv"
tmp_folder = "tmp"
......@@ -226,7 +227,7 @@ print("SCATTERED MODEL chosen: %s" % (scatter_model.upper())) # print user deci
if scatter_model == "Sphere":
sample_params_list = ["radius"]
elif scatter_model == "Core-Shell Sphere":
sample_params_list = ["radius", "shell"]
sample_params_list = ["radius", "rhocore", "rhoshell"]
print("-------------------------------------------------------\n")
# Categ searched
......@@ -332,7 +333,7 @@ for param in categ_param:
# ------------------------ DATABASE INFO ------------------------
# We have a master file with all possible combinations of sample and instrument parameters. And the cluster prediction for each one
database_df = pd.read_csv(join(database_path, scatter_model, results_file))
database_df = pd.read_csv(join(database_path, results_file))
# Extract unique values for each column
distinct_instr_values, reg_columns = distinct_values(database_df, categ_param, predicted_cluster)
# Our regression model differentiates between categ and non categorical variables. Only bg and mean predict are
......@@ -362,9 +363,9 @@ reg_X = X_non_categ + reg_X
# ------------------------ REGRESSION MODEL ------------------------
# Load regression model. We have a reg model for each cluster: e.g. Sphere_rf_cluster_0.pkl, Sphere_rf_cluster_1.pkl,...
# Extract the path of the reg models files
reg_files = [join(database_path, scatter_model, reg_model_type, f)
for f in os.listdir(join(database_path, scatter_model, reg_model_type))
if os.path.isfile(join(database_path, scatter_model, reg_model_type, f))]
reg_files = [join(database_path, reg_model_type, f)
for f in os.listdir(join(database_path, reg_model_type))
if os.path.isfile(join(database_path, reg_model_type, f))]
reg_files.sort() # Sort them by name
reg_list = []
for reg_model in reg_files: # append models objects in a script
......@@ -443,8 +444,8 @@ for idx, row in suggest_data.iterrows():
# We apply label_image to suggested images we have just created. In this way, we classify them into folders with name of clusters
subprocess.run(["python", join(python_script_path, "label_classify_image_folder.py"),
"--dir", join(database_path, tmp_folder), "--graph", join(database_path, scatter_model, "output_graph.pb"),
"--labels", join(database_path, scatter_model, "output_labels.txt"),
"--dir", join(database_path, tmp_folder), "--graph", join(database_path, "output_graph.pb"),
"--labels", join(database_path, "output_labels.txt"),
"--input_layer", "Placeholder", "--output_layer", "final_result",
"--results_path", join(database_path, tmp_folder, results_file),
"--sample_params_list", *sample_params_list, "--silence", "True"])
......
......@@ -135,8 +135,8 @@ def obtain_parameter_val(file_name, parameter):
if __name__ == "__main__":
input_height = 224 #299
input_width = 224 #299
input_height = 299 #224 #299
input_width = 299 #224 #299
input_mean = 0
input_std = 255
input_layer = "Placeholder"
......
......@@ -38,7 +38,7 @@ tmp_folder = "tmp" # tmp file. It is going to be created inside 'img_path'
save_img_sug_folder = "Classif_by_categ" # folder where we are going to create suggestes images. It is going to be created inside 'img_path'
python_script_path = "/users/fernandez-ruiz/scatteringimage/python/" # Where are python scripts. Remember that all of them should be in the same folder
matlab_path = "/users/fernandez-ruiz/scatteringimage/matlab/" # Where are matlab scripts. Remember that all of them should be in the same folder
retrain_model_path = "/home/dpt/fernandez-ruiz/TF_Results/GUI_Core/" # "output_labels.py", "output_graph.pb" should be in this folder. (files from retrain.py)
retrain_model_path = "/users/fernandez-ruiz/scatteringimage/models/Sphere/" # "output_labels.py", "output_graph.pb" should be in this folder. (files from retrain.py)
categ_search = "1" # [0: bad guinier, 1: good guinier, 2: one ring, 3: two or three rings, 4: four or five rings,
# 5: more than five rings, 6: bad background images, 7: background image]
num_clusters = "8" # number of clusters we are going to study
......
......@@ -239,8 +239,8 @@ if __name__ == "__main__":
# results_path = "/home/dpt/fernandez-ruiz/sim/sim_data/Sphere/log_image/20190531_resize/results.csv"
# suggest_path = "/home/dpt/fernandez-ruiz/sim/sim_data/Sphere/log_image/20190531_resize/suggest5.csv"
# matlab_path = "/users/fernandez-ruiz/scatteringimage/matlab/"
# retrain_model_path = "/home/dpt/fernandez-ruiz/TF_Results/sphere_7_improved/" # "output_labels.py", "output_graph.pb" should be in this folder. (files from retrain.py)
# python_script_path = "/users/fernandez-ruiz/scatteringimage/python/"
# retrain_model_path = "users/fernandez-ruiz/scatteringimage/models/Sphere/" # "output_labels.py", "output_graph.pb" should be in this folder. (files from retrain.py)
# tmp_folder = "/home/dpt/fernandez-ruiz/sim/sim_data/Sphere/log_image/20190531_resize/tmp"
# scatter_model = "Sphere" # Define the model
# categ_search = 1
......@@ -322,9 +322,9 @@ if __name__ == "__main__":
# Save regression model
if save_reg_model == "True":
model_name = scatter_model + "_" + reg_type + "_cluster" + str(i) + ".pkl" # Model name definition
if not os.path.isdir(join(retrain_model_path, scatter_model, reg_type)): # If the dir does not exists...
os.makedirs(join(retrain_model_path, scatter_model, reg_type), exist_ok=True) # create it
joblib_file = join(retrain_model_path, scatter_model, reg_type, model_name)
if not os.path.isdir(join(retrain_model_path, reg_type)): # If the dir does not exists...
os.makedirs(join(retrain_model_path, reg_type), exist_ok=True) # create it
joblib_file = join(retrain_model_path, reg_type, model_name)
joblib.dump(regr[i], joblib_file) # save model
# Plot summary of models
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment