loadcif.h 10 KB
Newer Older
Tobias WEBER's avatar
Tobias WEBER committed
1 2 3 4 5 6 7 8 9 10 11 12
/**
 * get atom positions from cif
 * @author Tobias Weber <tweber@ill.fr>
 * @date Jan-2019
 * @license GPLv3, see 'LICENSE' file
 */

#ifndef __LOAD_CIF_H__
#define __LOAD_CIF_H__

#include <vector>
#include <string>
Tobias WEBER's avatar
Tobias WEBER committed
13
//#include <iostream>
Tobias WEBER's avatar
Tobias WEBER committed
14 15 16 17 18 19 20 21 22 23 24 25
#include <fstream>
#include <sstream>

#include <boost/algorithm/string.hpp>

#include <gemmi/cif.hpp>
#include <gemmi/symmetry.hpp>

#include "libs/_cxx20/math_algos.h"
using namespace m_ops;


26

Tobias WEBER's avatar
Tobias WEBER committed
27 28 29 30 31 32 33 34
template<class t_real=double>
struct Lattice
{
	t_real a, b, c;
	t_real alpha, beta, gamma;
};


35

Tobias WEBER's avatar
Tobias WEBER committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
/**
 * removes quotation marks around a string
 */
template<class t_str = std::string>
void remove_quotes(t_str& str)
{
	if(str.length() == 0)
		return;

	if(str[0] == '\'' || str[0] == '\"')
		str.erase(str.begin());
	if(str[str.size()-1] == '\'' || str[str.size()-1] == '\"')
		str.erase(str.begin()+str.size()-1);
}



/**
 * gets the atom positions from the CIF
 */
template<class t_vec, class t_real = typename t_vec::value_type>
std::tuple<std::vector<t_vec>, std::vector<std::string>> 
get_cif_atoms(gemmi::cif::Block& block)
{
	std::vector<t_vec> atoms;
	std::vector<std::string> atomnames;

	// possible names for atom symbol
	const char* loopNames[] =
	{
		"_type_symbol",
		"_symbol"
		"_site_label",
		"_label",
	};

	for(const char* loopName : loopNames)
	{
		atoms.clear();
		atomnames.clear();

		auto tabAtoms = block.find("_atom_site", {loopName, "_fract_x", "_fract_y", "_fract_z"});

		for(std::size_t row=0; row<tabAtoms.length(); ++row)
		{
			auto name = boost::trim_copy(tabAtoms[row][0]);
			remove_quotes(name);
			atomnames.emplace_back(std::move(name));

			const t_real x = m::stoval<t_real>(tabAtoms[row][1]);
			const t_real y = m::stoval<t_real>(tabAtoms[row][2]);
			const t_real z = m::stoval<t_real>(tabAtoms[row][3]);
			atoms.emplace_back(t_vec{{x, y, z}});
		}

		// already found, no need to try another name
		if(tabAtoms.length())
			break;
	}

	return std::make_tuple(atoms, atomnames);
}



Tobias WEBER's avatar
Tobias WEBER committed
101 102 103 104 105 106 107 108
/**
 * gets the symmetry operations from the CIF
 */
template<class t_vec, class t_mat, class t_real = typename t_vec::value_type>
std::vector<t_mat> get_cif_ops(gemmi::cif::Block& block)
{
	std::vector<t_mat> ops;

Tobias WEBER's avatar
Tobias WEBER committed
109 110 111 112 113 114 115 116
	// possible names for symop loop
	const char* loopNames[] =
	{
		"_symmetry_equiv_pos_as_xyz",
		"_space_group_symop_operation_xyz",
	};

	for(const char* loopName : loopNames)
Tobias WEBER's avatar
Tobias WEBER committed
117
	{
Tobias WEBER's avatar
Tobias WEBER committed
118
		ops.clear();
Tobias WEBER's avatar
Tobias WEBER committed
119

Tobias WEBER's avatar
Tobias WEBER committed
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
		//std::cerr << "Trying symop loop name " << loopName << std::endl;
	 	auto colOps = block.find_values(loopName);

		for(std::size_t row=0; row<colOps.length(); ++row)
		{
			auto therow = boost::trim_copy(colOps[row]);
			remove_quotes(therow);

			auto op = gemmi::parse_triplet(therow)/*.wrap()*/;
			auto M = op.float_seitz();

			t_mat mat = m::create<t_mat>({
				std::get<0>(std::get<0>(M)), std::get<1>(std::get<0>(M)), std::get<2>(std::get<0>(M)), std::get<3>(std::get<0>(M)),
				std::get<0>(std::get<1>(M)), std::get<1>(std::get<1>(M)), std::get<2>(std::get<1>(M)), std::get<3>(std::get<1>(M)),
				std::get<0>(std::get<2>(M)), std::get<1>(std::get<2>(M)), std::get<2>(std::get<2>(M)), std::get<3>(std::get<2>(M)),
				std::get<0>(std::get<3>(M)), std::get<1>(std::get<3>(M)), std::get<2>(std::get<3>(M)), std::get<3>(std::get<3>(M)) });

			ops.emplace_back(std::move(mat));
		}
Tobias WEBER's avatar
Tobias WEBER committed
139

Tobias WEBER's avatar
Tobias WEBER committed
140 141 142
		// already found, no need to try another name
		if(colOps.length())
			break;
Tobias WEBER's avatar
Tobias WEBER committed
143 144 145 146 147 148
	}

	return ops;
}


149

Tobias WEBER's avatar
Tobias WEBER committed
150 151
/**
 * gets the symmetry operations from the CIF's space group
152
 * (use m::equals_all to check if space group operations are the same)
Tobias WEBER's avatar
Tobias WEBER committed
153 154 155 156 157 158 159 160 161
 */
template<class t_vec, class t_mat, class t_real = typename t_vec::value_type>
std::vector<t_mat> get_cif_sg_ops(gemmi::cif::Block& block)
{
	std::vector<t_mat> ops;

	if(auto val = block.find_values("_symmetry_space_group_name_H-M"); val.length())
	{
		std::string sgname = boost::trim_copy(val[0]);
Tobias WEBER's avatar
Tobias WEBER committed
162
		remove_quotes(sgname);
Tobias WEBER's avatar
Tobias WEBER committed
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

		if(auto sg = gemmi::find_spacegroup_by_name(sgname))
		{
			auto symops = sg->operations().all_ops_sorted();
			for(const auto &op : symops)
			{
				auto M = op.float_seitz();

				t_mat mat = m::create<t_mat>({
					std::get<0>(std::get<0>(M)), std::get<1>(std::get<0>(M)), std::get<2>(std::get<0>(M)), std::get<3>(std::get<0>(M)),
					std::get<0>(std::get<1>(M)), std::get<1>(std::get<1>(M)), std::get<2>(std::get<1>(M)), std::get<3>(std::get<1>(M)),
					std::get<0>(std::get<2>(M)), std::get<1>(std::get<2>(M)), std::get<2>(std::get<2>(M)), std::get<3>(std::get<2>(M)),
					std::get<0>(std::get<3>(M)), std::get<1>(std::get<3>(M)), std::get<2>(std::get<3>(M)), std::get<3>(std::get<3>(M)) });

				ops.emplace_back(std::move(mat));
			}
		}
	}

	return ops;
}


186

Tobias WEBER's avatar
Tobias WEBER committed
187 188 189 190
/**
 * loads the lattice parameters and the atom positions from a CIF
 */
template<class t_vec, class t_mat, class t_real = typename t_vec::value_type>
Tobias WEBER's avatar
Tobias WEBER committed
191 192 193 194 195 196 197
std::tuple<
	std::string /* errors and warnings */, 
	std::vector<t_vec> /* basic atom positions */, 
	std::vector<std::vector<t_vec>> /* all generated atoms */, 
	std::vector<std::string> /* atom names */, 
	Lattice<t_real> /* lattice */ ,
	std::vector<t_mat> /* ops */ > 
Tobias WEBER's avatar
Tobias WEBER committed
198
load_cif(const std::string& filename, t_real eps=1e-6)
Tobias WEBER's avatar
Tobias WEBER committed
199 200 201
{
	auto ifstr = std::ifstream(filename);
	if(!ifstr)
Tobias WEBER's avatar
Tobias WEBER committed
202
		return std::make_tuple("Cannot open CIF.", std::vector<t_vec>{}, std::vector<std::vector<t_vec>>{}, std::vector<std::string>{}, Lattice{}, std::vector<t_mat>{});
Tobias WEBER's avatar
Tobias WEBER committed
203 204 205 206 207

	// load CIF
	auto cif = gemmi::cif::read_istream(ifstr, 4096, filename.c_str());

	if(!cif.blocks.size())
Tobias WEBER's avatar
Tobias WEBER committed
208
		return std::make_tuple("No blocks in CIF.", std::vector<t_vec>{}, std::vector<std::vector<t_vec>>{}, std::vector<std::string>{}, Lattice{}, std::vector<t_mat>{});
Tobias WEBER's avatar
Tobias WEBER committed
209 210 211 212 213 214 215

	// get the block
	/*const*/ auto& block = cif.sole_block();


	// lattice
	t_real a{}, b{}, c{}, alpha{}, beta{}, gamma{};
Tobias WEBER's avatar
Tobias WEBER committed
216 217 218 219 220 221
	if(auto val = block.find_values("_cell_length_a"); val.length()) a = m::stoval<t_real>(val[0]);
	if(auto val = block.find_values("_cell_length_b"); val.length()) b = m::stoval<t_real>(val[0]);
	if(auto val = block.find_values("_cell_length_c"); val.length()) c = m::stoval<t_real>(val[0]);
	if(auto val = block.find_values("_cell_angle_alpha"); val.length()) alpha = m::stoval<t_real>(val[0]);
	if(auto val = block.find_values("_cell_angle_beta"); val.length()) beta = m::stoval<t_real>(val[0]);
	if(auto val = block.find_values("_cell_angle_gamma"); val.length()) gamma = m::stoval<t_real>(val[0]);
Tobias WEBER's avatar
Tobias WEBER committed
222 223 224 225 226

	Lattice<t_real> latt{.a=a, .b=b, .c=c, .alpha=alpha, .beta=beta, .gamma=gamma};


	// fractional atom positions
Tobias WEBER's avatar
Tobias WEBER committed
227
	auto [atoms, atomnames] = get_cif_atoms<t_vec>(block);
Tobias WEBER's avatar
Tobias WEBER committed
228 229 230


	// generate all atoms using symmetry ops
Tobias WEBER's avatar
Tobias WEBER committed
231
	std::ostringstream errstr;
Tobias WEBER's avatar
Tobias WEBER committed
232 233 234
	std::vector<std::vector<t_vec>> generatedatoms;
	auto ops = get_cif_ops<t_vec, t_mat, t_real>(block);
	if(!ops.size()) // if ops are not directly given, use standard ops from space group
Tobias WEBER's avatar
Tobias WEBER committed
235 236
	{
		errstr << "Warning: Could not find CIF symops, trying to use space group defaults instead.\n";
Tobias WEBER's avatar
Tobias WEBER committed
237
		ops = get_cif_sg_ops<t_vec, t_mat, t_real>(block);
Tobias WEBER's avatar
Tobias WEBER committed
238 239 240
		if(!ops.size())
			errstr << "Warning: No symops found!\n";
	}
Tobias WEBER's avatar
Tobias WEBER committed
241 242 243 244 245 246

	for(t_vec atom : atoms)
	{
		// make homogeneuous 4-vector
		if(atom.size() == 3) atom.push_back(1);

Tobias WEBER's avatar
Tobias WEBER committed
247 248
		// if no ops are given, just output the raw atom position
		if(!ops.size())
Tobias WEBER's avatar
Tobias WEBER committed
249
		{
Tobias WEBER's avatar
Tobias WEBER committed
250
			generatedatoms.push_back(std::vector<t_vec>{{atom}});
Tobias WEBER's avatar
Tobias WEBER committed
251 252
			continue;
		}
Tobias WEBER's avatar
Tobias WEBER committed
253

Tobias WEBER's avatar
Tobias WEBER committed
254
		std::vector<t_vec> newatoms = m::apply_ops_hom<t_vec, t_mat, t_real>(atom, ops, eps);
Tobias WEBER's avatar
Tobias WEBER committed
255 256 257
		generatedatoms.emplace_back(std::move(newatoms));
	}

Tobias WEBER's avatar
Tobias WEBER committed
258
	return std::make_tuple(errstr.str(), atoms, generatedatoms, atomnames, latt, ops);
Tobias WEBER's avatar
Tobias WEBER committed
259 260
}

Tobias WEBER's avatar
Tobias WEBER committed
261 262 263 264 265 266


/**
 * gets space group description strings and symmetry operations
 */
template<class t_mat, class t_real = typename t_mat::value_type>
Tobias WEBER's avatar
Tobias WEBER committed
267 268 269 270 271
std::vector<std::tuple<
	int,				// sg number 
	std::string, 		// description
	std::vector<t_mat>	// symops
	>>
Tobias WEBER's avatar
Tobias WEBER committed
272
get_sgs(bool bAddNr=true, bool bAddHall=true)
Tobias WEBER's avatar
Tobias WEBER committed
273 274 275 276 277 278
{
	std::vector<std::tuple<int, std::string, std::vector<t_mat>>> sgs;

	for(const auto &sg : gemmi::spacegroup_tables::main)
	{
		std::ostringstream ostrDescr;
Tobias WEBER's avatar
Tobias WEBER committed
279 280 281
		if(bAddNr) ostrDescr << "#" << sg.number << ": ";
		ostrDescr << sg.hm;
		if(bAddHall) ostrDescr << " (" << sg.hall << ")";
Tobias WEBER's avatar
Tobias WEBER committed
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308

		std::vector<t_mat> ops;
		for(const auto &op : sg.operations().all_ops_sorted())
		{
			auto M = op.float_seitz();

			t_mat mat = m::create<t_mat>({
				std::get<0>(std::get<0>(M)), std::get<1>(std::get<0>(M)), std::get<2>(std::get<0>(M)), std::get<3>(std::get<0>(M)),
				std::get<0>(std::get<1>(M)), std::get<1>(std::get<1>(M)), std::get<2>(std::get<1>(M)), std::get<3>(std::get<1>(M)),
				std::get<0>(std::get<2>(M)), std::get<1>(std::get<2>(M)), std::get<2>(std::get<2>(M)), std::get<3>(std::get<2>(M)),
				std::get<0>(std::get<3>(M)), std::get<1>(std::get<3>(M)), std::get<2>(std::get<3>(M)), std::get<3>(std::get<3>(M)) });

			ops.emplace_back(std::move(mat));
		}

		sgs.emplace_back(std::make_tuple(sg.number, ostrDescr.str(), ops));
	}

	std::stable_sort(sgs.begin(), sgs.end(), [](const auto& sg1, const auto& sg2) -> bool
	{
		return std::get<0>(sg1) < std::get<0>(sg2);
	});

	return sgs;
}


Tobias WEBER's avatar
Tobias WEBER committed
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

/**
 * finds all space groups which transform the initial positions into the final ones
 */
template<class t_vec, class t_mat, class t_real = typename t_mat::value_type>
std::vector<std::tuple<
	int,				// sg number 
	std::string, 		// description
	std::vector<t_mat>	// symops
	>>
find_matching_sgs(
	const std::vector<t_vec>& posInit, const std::vector<t_vec>& _posFinal, 
	t_real eps=1e-6)
{
	std::vector<t_vec> posFinal = m::keep_atoms_in_uc<t_vec, t_real>(_posFinal);


	std::vector<std::tuple<int, std::string, std::vector<t_mat>>> matchingSGs;
	auto sgs = get_sgs<t_mat, t_real>();

	// iterate spacegroups
	for(const auto& [sgNum, sgName, sgOps] : sgs)
	{
		// generate symmetry-equivalent positions
		std::vector<t_vec> generatedpos;

		for(const t_vec& pos : posInit)
		{
			std::vector<t_vec> newpos = m::apply_ops_hom<t_vec, t_mat, t_real>(pos, sgOps, eps);
			generatedpos.insert(generatedpos.end(), newpos.begin(), newpos.end());
		}

		//for(const auto& thepos : generatedpos)
		//	std::cout << thepos << std::endl;

		// filter multiple occupancies in generatedpos
		generatedpos = m::remove_duplicates<t_vec>(generatedpos, eps);


		// no match
		if(!m::equals_all<t_vec>(generatedpos, posFinal, eps, 3))
			continue;

		matchingSGs.emplace_back(std::make_tuple(sgNum, sgName, sgOps));
	}

	return matchingSGs;
}


Tobias WEBER's avatar
Tobias WEBER committed
359
#endif