Author: bugman Date: Wed Nov 25 18:38:32 2015 New Revision: 28116 URL: http://svn.gna.org/viewcvs/relax?rev=28116&view=rev Log: Added support for 'observer' structures in the structure.pca user function.
This allows a subset of the structures used in the PC analysis to have zero weight so that these structures can be used for comparison purposes. The obs_pipes, obs_models, and obs_molecules arguments have been added to the user function front end. The backend uses this to create an array of weights for each structure. And the lib.structure.pca functions use the zero weights to remove the observer structures from the PC mode calculations. Modified: trunk/lib/structure/pca.py trunk/pipe_control/structure/main.py trunk/user_functions/structure.py Modified: trunk/lib/structure/pca.py URL: http://svn.gna.org/viewcvs/relax/trunk/lib/structure/pca.py?rev=28116&r1=28115&r2=28116&view=diff ============================================================================== --- trunk/lib/structure/pca.py (original) +++ trunk/lib/structure/pca.py Wed Nov 25 18:38:32 2015 @@ -23,7 +23,7 @@ """Module for performing a principle component analysis (PCA).""" # Python module imports. -from numpy import dot, float64, outer, sqrt, zeros +from numpy import array, dot, float64, outer, sqrt, zeros from numpy.linalg import eigh, svd # relax library module imports. @@ -31,11 +31,13 @@ from lib.structure.statistics import calc_mean_structure -def calc_covariance_matrix(coord=None): +def calc_covariance_matrix(coord=None, weights=None): """Calculate the covariance matrix for the structures. @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates. @type coord: list of numpy rank-2, Nx3 arrays + @keyword weights: The weights for each structure. + @type weights: list of float @return: The covariance matrix and the deviation matrix. @rtype: numpy rank-2 3Nx3N array, numpy rank-2 MxNx3 array """ @@ -43,12 +45,16 @@ # Init. M = len(coord) N = len(coord[0]) + if weights == None: + weights = ones(M, float64) + else: + weights = array(weights, float64) covariance_matrix = zeros((N*3, N*3), float64) deviations = zeros((M, N, 3), float64) mean_struct = zeros((N, 3), float64) # Calculate the mean structure. - calc_mean_structure(coord, mean_struct) + calc_mean_structure(coord, mean_struct, weights=weights) # Loop over the models. for i in range(M): @@ -56,10 +62,10 @@ deviations[i] = coord[i] - mean_struct # Sum the covariance element. - covariance_matrix += outer(deviations[i], deviations[i]) + covariance_matrix += weights[i] * (outer(deviations[i], deviations[i])) # Average. - covariance_matrix /= M + covariance_matrix /= weights.sum() # Return the matrix. return covariance_matrix, deviations @@ -73,11 +79,13 @@ """ -def pca_analysis(coord=None, algorithm='eigen', num_modes=4): +def pca_analysis(coord=None, weights=None, algorithm='eigen', num_modes=4): """Perform the PCA analysis. @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates. @type coord: list of numpy rank-2, Nx3 arrays + @keyword weights: The weights for each structure. + @type weights: list of float @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd'). @type algorithm: str @keyword num_modes: The number of PCA modes to calculate. @@ -91,7 +99,7 @@ N = len(coord[0]) # Calculate the covariance matrix for the structures. - covariance_matrix, deviations = calc_covariance_matrix(coord) + covariance_matrix, deviations = calc_covariance_matrix(coord, weights=weights) # Perform an eigenvalue decomposition of the covariance matrix. if algorithm == 'eigen': Modified: trunk/pipe_control/structure/main.py URL: http://svn.gna.org/viewcvs/relax/trunk/pipe_control/structure/main.py?rev=28116&r1=28115&r2=28116&view=diff ============================================================================== --- trunk/pipe_control/structure/main.py (original) +++ trunk/pipe_control/structure/main.py Wed Nov 25 18:38:32 2015 @@ -21,7 +21,7 @@ # Python module imports. from minfx.generic import generic_minimise -from numpy import array, average, dot, float64, mean, std, zeros +from numpy import array, average, dot, float64, mean, ones, std, zeros from numpy.linalg import norm from os import F_OK, access, getcwd from re import search @@ -1009,25 +1009,31 @@ cdp.N = len(from_mols) -def pca(pipes=None, models=None, molecules=None, atom_id=None, algorithm=None, num_modes=4, format='grace', dir=None): - """PCA analysis of the motions between all the loaded models. - - @keyword pipes: The data pipes to determine the RMSD for. - @type pipes: None or list of str - @keyword models: The list of models to determine the RMSD for. The number of elements must match the pipes argument. If set to None, then all models will be used. - @type models: None or list of lists of int - @keyword molecules: The list of molecules to determine the RMSD for. The number of elements must match the pipes argument. - @type molecules: None or list of lists of str - @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. - @type atom_id: str or None - @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd'). - @type algorithm: str - @keyword num_modes: The number of PCA modes to calculate. - @type num_modes: int - @keyword format: The graph format to use. - @type format: str - @keyword dir: The optional directory to place the graphs into. - @type dir: str +def pca(pipes=None, models=None, molecules=None, obs_pipes=None, obs_models=None, obs_molecules=None, atom_id=None, algorithm=None, num_modes=4, format='grace', dir=None): + """PC analysis of the motions between all the loaded models. + + @keyword pipes: The data pipes to perform the PC analysis on. + @type pipes: None or list of str + @keyword models: The list of models to perform the PC analysis on. The number of elements must match the pipes argument. If set to None, then all models will be used. + @type models: None or list of lists of int + @keyword molecules: The list of molecules to perform the PC analysis on. The number of elements must match the pipes argument. + @type molecules: None or list of lists of str + @keyword obs_pipes: The data pipes in the PC analysis which will have zero weight. These structures are for comparison. + @type obs_pipes: None or list of str + @keyword obs_models: The list of models in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. If set to None, then all models will be used. + @type obs_models: None or list of lists of int + @keyword obs_molecules: The list of molecules in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. + @type obs_molecules: None or list of lists of str + @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. + @type atom_id: str or None + @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd'). + @type algorithm: str + @keyword num_modes: The number of PCA modes to calculate. + @type num_modes: int + @keyword format: The graph format to use. + @type format: str + @keyword dir: The optional directory to place the graphs into. + @type dir: str """ # Checks. @@ -1036,8 +1042,24 @@ # Assemble the structural coordinates. coord, ids, object_id_list, model_list, molecule_list, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id, lists=True) - # Perform the PCA analysis. - values, vectors, proj = pca_analysis(coord=coord, algorithm=algorithm, num_modes=num_modes) + # Determine the structure weights. + M = len(coord) + if obs_pipes == None: + obs_pipes = [] + weights = ones(M, float64) + for struct in range(M): + # Is the structure in the 'observing' lists? + for i in range(len(obs_pipes)): + # Matching data pipe. + if object_id_list[struct] == obs_pipes[i]: + # Matching molecules. + if obs_molecules == None or molecule_list[struct] == obs_molecules[i]: + # Matching models. + if obs_models == None or model_list[struct] == obs_models[i]: + weights[struct] = 0.0 + + # Perform the PC analysis. + values, vectors, proj = pca_analysis(coord=coord, weights=weights, algorithm=algorithm, num_modes=num_modes) # Store the values. cdp.structure.pca_values = values @@ -1045,7 +1067,6 @@ cdp.structure.pca_proj = proj # Generate the graphs. - M = len(coord) for mode in range(num_modes - 1): # Assemble the data. data = [[[]]] Modified: trunk/user_functions/structure.py URL: http://svn.gna.org/viewcvs/relax/trunk/user_functions/structure.py?rev=28116&r1=28115&r2=28116&view=diff ============================================================================== --- trunk/user_functions/structure.py (original) +++ trunk/user_functions/structure.py Wed Nov 25 18:38:32 2015 @@ -869,12 +869,12 @@ # The structure.pca user function. uf = uf_info.add_uf('structure.pca') uf.title = "Principle component analysis (PCA) of the motions in an ensemble of structures." -uf.title_short = "PCA analysis of structures." +uf.title_short = "Structural PCA." uf.add_keyarg( name = "pipes", py_type = "str_list", desc_short = "data pipes", - desc = "The data pipes to determine the RMSD for.", + desc = "The data pipes to perform the PC analysis on.", wiz_combo_iter = pipe_names, wiz_read_only = False, can_be_none = True @@ -883,14 +883,37 @@ name = "models", py_type = "int_list_of_lists", desc_short = "model list for each data pipe", - desc = "The list of models for each data pipe to determine the RMSD for. The number of elements must match the pipes argument. If no models are given, then all will be used.", + desc = "The list of models for each data pipe to perform the PC analysis on. The number of elements must match the pipes argument. If no models are given, then all will be used.", can_be_none = True ) uf.add_keyarg( name = "molecules", py_type = "str_list_of_lists", desc_short = "molecule list for each data pipe", - desc = "The list of molecules for each data pipe to determine the RMSD for. The RMSD will only be calculated for atoms with identical residue name and number and atom name. The number of elements must match the pipes argument. If no molecules are given, then all will be used.", + desc = "The list of molecules for each data pipe to perform the PC analysis on. The PCA will only be calculated for atoms with identical residue name and number and atom name. The number of elements must match the pipes argument. If no molecules are given, then all will be used.", + can_be_none = True +) +uf.add_keyarg( + name = "obs_pipes", + py_type = "str_list", + desc_short = "observing data pipes", + desc = "The data pipes in the PC analysis which will have zero weight. These structures are for comparison.", + wiz_combo_iter = pipe_names, + wiz_read_only = False, + can_be_none = True +) +uf.add_keyarg( + name = "obs_models", + py_type = "int_list_of_lists", + desc_short = "observing model list for each data pipe", + desc = "The list of models for each data pipe in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. If no models are given, then all will be used.", + can_be_none = True +) +uf.add_keyarg( + name = "obs_molecules", + py_type = "str_list_of_lists", + desc_short = "observing molecule list for each data pipe", + desc = "The list of molecules for each data pipe in the PC analysis which will have zero weight. These structures are for comparison. The PCA will only be calculated for atoms with identical residue name and number and atom name. The number of elements must match the pipes argument. If no molecules are given, then all will be used.", can_be_none = True ) uf.add_keyarg( @@ -942,6 +965,7 @@ uf.desc.append(Desc_container()) uf.desc[-1].add_paragraph("Perform a principle component analysis (PCA) for all the chosen structures. 2D graphs of the PC projections will be generated and placed in the specified directory.") uf.desc[-1].add_paragraph(paragraph_multi_struct) +uf.desc[-1].add_paragraph("A subset of the structures can be set as 'observing'. This means that they will have a weight of zero when constructing the covariance matrix and determining its eigenvectors. Therefore the structures will not contribute to the principle components, but will be present and compared to structures used in the analysis.") uf.desc[-1].add_paragraph(paragraph_atom_id) # Prompt examples. uf.desc.append(Desc_container("Prompt examples")) @@ -949,7 +973,7 @@ uf.desc[-1].add_prompt("relax> structure.pca()") uf.backend = pipe_control.structure.main.pca uf.menu_text = "&pca" -uf.wizard_height_desc = 400 +uf.wizard_height_desc = 300 uf.wizard_size = (1000, 750) uf.wizard_image = WIZARD_IMAGE_PATH + 'structure' + sep + '2JK4.png' _______________________________________________ relax (http://www.nmr-relax.com) This is the relax-commits mailing list relax-commits@gna.org To unsubscribe from this list, get a password reminder, or change your subscription options, visit the list information page at https://mail.gna.org/listinfo/relax-commits