model = structure[0] # We're only using the first model here dssp = DSSP(model, file_path) # DSSP for secondary structure
for chain in model: for residue in chain: for atom in residue: coordinates.append(atom.get_coord())
# One-hot encoding of amino acid type amino_acid = [0]*20 if residue.get_resname() in IUPACData.protein_letters_1to3: idx = IUPACData.protein_letters.index(IUPACData.protein_letters_1to3[residue.get_resname()]) amino_acid[idx] = 1 amino_acids.append(amino_acid)
# Secondary structure try: sec_structure.append(dssp[(chain.get_id(), residue.get_id())][2]) except KeyError: sec_structure.append(0) # If residue not in DSSP output, assign a default value
from Bio.PDB import PDBParser, DSSP from Bio.SeqUtils import IUPACData import torch import numpy as np import os
defpdb_files_to_tensors(file_paths): parser = PDBParser(QUIET=True) data = [] for file_path in file_paths: structure = parser.get_structure("pdb", file_path)
model = structure[0] # We're only using the first model here dssp = DSSP(model, file_path) # DSSP for secondary structure
for chain in model: for residue in chain: for atom in residue: coordinates.append(atom.get_coord())
# One-hot encoding of amino acid type amino_acid = [0]*20 if residue.get_resname() in IUPACData.protein_letters_1to3: idx = IUPACData.protein_letters.index(IUPACData.protein_letters_1to3[residue.get_resname()]) amino_acid[idx] = 1 amino_acids.append(amino_acid)
# Secondary structure try: sec_structure.append(dssp[(chain.get_id(), residue.get_id())][2]) except KeyError: sec_structure.append(0) # If residue not in DSSP output, assign a default value