Source Code of org.jmol.adapter.readers.cifpdb.PdbReader

/* $RCSfile$
 * $Author: hansonr $
 * $Date: 2006-10-15 17:34:01 -0500 (Sun, 15 Oct 2006) $
 * $Revision: 5957 $
 *
 * Copyright (C) 2003-2005  Miguel, Jmol Development, www.jmol.org
 *
 * Contact: jmol-developers@lists.sf.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */


package org.jmol.adapter.readers.cifpdb;


import org.jmol.adapter.smarter.*;




import org.jmol.api.JmolAdapter;
import org.jmol.util.Logger;
import org.jmol.util.TextFormat;


import java.util.Hashtable;
import java.util.List;
import java.util.ArrayList;


import javax.vecmath.Matrix4f;


/**
 * PDB file reader.
 *
 *<p>
 * <a href='http://www.rcsb.org'>
 * http://www.rcsb.org
 * </a>
 *
 * @author Miguel, Egon, and Bob (hansonr@stolaf.edu)
 * 
 * symmetry added by Bob Hanson:
 * 
 *  setFractionalCoordinates()
 *  setSpaceGroupName()
 *  setUnitCell()
 *  initializeCartesianToFractional();
 *  setUnitCellItem()
 *  setAtomCoord()
 *  applySymmetryAndSetTrajectory()
 *  
 */


public class PdbReader extends AtomSetCollectionReader {
  private int lineLength;
  // index into atoms array + 1
  // so that 0 can be used for the null value
  private final Hashtable htFormul = new Hashtable();
  private Hashtable htHetero = null;
  private Hashtable htSites = null;
  private String currentGroup3;
  private int currentResno = Integer.MIN_VALUE;
  private Hashtable htElementsInCurrentGroup;
  private int maxSerial;
  private int[] chainAtomCounts;
  private int nUNK;
  private int nRes;
  private boolean isMultiModel;  // MODEL ...
  
 final private static String lineOptions = 
   "ATOM    " + //0
   "HETATM  " + //1
   "MODEL   " + //2
   "CONECT  " + //3
   "HELIX   " + //4,5,6
   "SHEET   " +
   "TURN    " +
   "HET     " + //7
   "HETNAM  " + //8
   "ANISOU  " + //9
   "SITE    " + //10
   "CRYST1  " + //11
   "SCALE1  " + //12,13,14
   "SCALE2  " +
   "SCALE3  " +
   "EXPDTA  " + //15
   "FORMUL  " + //16
   "REMARK  " + //17
   "HEADER  " + //18
   "COMPND  " + //19
   "SOURCE  ";  //20


 private int serial = 0;
 private StringBuffer pdbHeader;
 private int configurationPtr = Integer.MIN_VALUE;
 
 protected void initializeReader() throws Exception {
   atomSetCollection.setIsPDB();
   pdbHeader = (getHeader ? new StringBuffer() : null);
   if (checkFilter("CONF ")) {
     configurationPtr = parseInt(filter, filter.indexOf("CONF ") + 5);
     sbIgnored = new StringBuffer();
     sbSelected = new StringBuffer();
   }
 }


 protected boolean checkLine() throws Exception {
    int ptOption = ((lineLength = line.length()) < 6 ? -1 : lineOptions
        .indexOf(line.substring(0, 6))) >> 3;
    boolean isAtom = (ptOption == 0 || ptOption == 1);
    boolean isModel = (ptOption == 2);
    if (isAtom)
      serial = parseInt(line, 6, 11);
    boolean isNewModel = ((isTrajectory || isSequential) && !isMultiModel && isAtom && serial == 1);
    if (getHeader) {
      if (isAtom || isModel)
        getHeader = false;
      else
        pdbHeader.append(line).append('\n');
    }
    if (isModel || isNewModel) {
      isMultiModel = isModel;
      getHeader = false;
      // PDB is different -- targets actual model number
      int modelNo = (isNewModel ? modelNumber + 1 : getModelNumber());
      // System.out.println(modelNo);
      modelNumber = (bsModels == null ? modelNo : modelNumber + 1);
      if (!doGetModel(modelNumber))
        return checkLastModel();
      atomSetCollection.connectAll(maxSerial);
      if (atomCount > 0)
        applySymmetryAndSetTrajectory();
      // supposedly MODEL is only for NMR
      model(modelNo);
      if (!isAtom)
        return true;
    }
    /*
     * OK, the PDB file format is messed up here, because the above commands are
     * all OUTSIDE of the Model framework. Of course, different models might
     * have different secondary structures, but it is not clear that PDB
     * actually supports this. So you can't concatinate PDB files the way you
     * can CIF files. --Bob Hanson 8/30/06
     */
    if (isMultiModel && !doProcessLines)
      return true;
    if (isAtom) {
      getHeader = false;
      atom(serial);
      return true;
    }
    switch (ptOption) {
    case 3:
      conect();
      return true;
    case 4:
    case 5:
    case 6:
      // if (line.startsWith("HELIX ") || line.startsWith("SHEET ")
      // || line.startsWith("TURN  ")) {
      structure();
      return true;
    case 7:
      het();
      return true;
    case 8:
      hetnam();
      return true;
    case 9:
      anisou();
      return true;
    case 10:
      site();
      return true;
    case 11:
      cryst1();
      return true;
    case 12:
    case 13:
    case 14:
      // if (line.startsWith("SCALE1")) {
      // if (line.startsWith("SCALE2")) {
      // if (line.startsWith("SCALE3")) {
      scale(ptOption - 11);
      return true;
    case 15:
      expdta();
      return true;
    case 16:
      formul();
      return true;
    case 17:
      if (line.startsWith("REMARK 350")) {
        remark350();
        return false;
      }
      if (line.startsWith("REMARK 290")) {
        remark290();
        return false;
      }
      checkLineForScript();
      return true;
    case 18:
      header();
      return true;
    case 19:
      compndOld();
      compndSource(false);
      return true;
    case 20:
      compndSource(true);
      return true;
    }
    return true;
  }


  protected void finalizeReader() throws Exception {
    checkNotPDB();
    atomSetCollection.connectAll(maxSerial);
    if (biomolecules != null && biomolecules.size() > 0
        && atomSetCollection.getAtomCount() > 0) {
      atomSetCollection.setAtomSetAuxiliaryInfo("biomolecules", biomolecules);
      setBiomoleculeAtomCounts();
      if (biomts != null && !checkFilter("NOSYMMETRY")) {
        atomSetCollection.applySymmetry(biomts, notionalUnitCell, applySymmetryToBonds, filter);
      }
    }
    super.finalizeReader();
    if (vCompnds != null)
      atomSetCollection.setAtomSetCollectionAuxiliaryInfo("compoundSource", vCompnds);
    if (htSites != null)// && atomSetCollection.getAtomSetCount() == 1)
      addSites(htSites);
    if (pdbHeader != null)
      atomSetCollection.setAtomSetCollectionAuxiliaryInfo("fileHeader",
          pdbHeader.toString());
    if (configurationPtr > 0) {
      Logger.info(sbSelected.toString());
      Logger.info(sbIgnored.toString());
    }
  }
  
  public void applySymmetryAndSetTrajectory() throws Exception {
    // This speeds up calculation, because no crosschecking
    // No special-position atoms in mmCIF files, because there will
    // be no center of symmetry, no rotation-inversions, 
    // no atom-centered rotation axes, and no mirror or glide planes. 
    atomSetCollection.setCheckSpecial(false);
    super.applySymmetryAndSetTrajectory();
  }


  private void header() {
    if (lineLength < 8)
      return;
    if (lineLength >= 66)
      atomSetCollection.setCollectionName(line.substring(62, 66));
    if (lineLength > 50)
      line = line.substring(0, 50);
    atomSetCollection.setAtomSetCollectionAuxiliaryInfo("CLASSIFICATION", line.substring(7).trim());
  }


  private List vCompnds;
  private Hashtable currentCompnd;
  private String currentKey;
  private Hashtable htMolIds;
  private boolean resetKey = true;
  
  String compnd = null;
  private void compndOld() {
    if (compnd == null)
      compnd = "";
    else
      compnd += " ";
    String s = line;
    if (lineLength > 62)
      s = s.substring(0, 62);
    compnd += s.substring(10).trim();
    atomSetCollection.setAtomSetCollectionAuxiliaryInfo("COMPND", compnd);
  }


  private void compndSource(boolean isSource) {
    if (vCompnds == null) {
      if (isSource)
        return;
      vCompnds = new ArrayList();
      htMolIds = new Hashtable();
      currentCompnd = new Hashtable();
      currentCompnd.put("select", "(*)");
      currentKey = "MOLECULE";
      htMolIds.put("", currentCompnd);
    }
    if (isSource && resetKey) {
      resetKey = false;
      currentKey = "SOURCE";
      currentCompnd = (Hashtable) htMolIds.get("");
    }
    line = line.substring(10, Math.min(lineLength, 72)).trim();
    int pt = line.indexOf(":");
    if (pt < 0 || pt > 0 && line.charAt(pt - 1) == '\\')
      pt = line.length();
    String key = line.substring(0, pt).trim();
    String value = (pt < line.length() ? line.substring(pt + 1) : null);
    if (key.equals("MOL_ID")) {
      if (value == null)
        return;
      if (isSource) {
        currentCompnd = (Hashtable) htMolIds.remove(value);
        return;
      }
      currentCompnd = new Hashtable();
      vCompnds.add(currentCompnd);
      htMolIds.put(value, currentCompnd);
    }
    if (currentCompnd == null)
      return;
    if (value == null) {
      value = (String) currentCompnd.get(currentKey);
      if (value == null)
        value = "";
      value += key;
      if (vCompnds.size() == 0)
        vCompnds.add(currentCompnd);
    } else {
      currentKey = key;
    }
    if (value.endsWith(";"))
      value = value.substring(0, value.length() - 1);
    currentCompnd.put(currentKey, value);
    if (currentKey.equals("CHAIN"))
      currentCompnd.put("select", "(:"
          + TextFormat.simpleReplace(TextFormat
              .simpleReplace(value, ", ", ",:"), " ", "") + ")");
  }


  private void setBiomoleculeAtomCounts() {
    for (int i = biomolecules.size(); --i >= 0;) {
      Hashtable biomolecule = (Hashtable) (biomolecules.get(i));
      String chain = (String) biomolecule.get("chains");
      int nTransforms = ((List) biomolecule.get("biomts")).size();
      int nAtoms = 0;
      for (int j = chain.length() - 1; --j >= 0;)
        if (chain.charAt(j) == ':')
          nAtoms += chainAtomCounts[chain.charAt(j + 1)];
      biomolecule.put("atomCount", new Integer(nAtoms * nTransforms));
    }
  }


/* 
 REMARK 350 BIOMOLECULE: 1                                                       
 REMARK 350 APPLY THE FOLLOWING TO CHAINS: 1, 2, 3, 4, 5, 6,  
 REMARK 350 A, B, C
 REMARK 350   BIOMT1   1  1.000000  0.000000  0.000000        0.00000            
 REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000            
 REMARK 350   BIOMT3   1  0.000000  0.000000  1.000000        0.00000            
 REMARK 350   BIOMT1   2  0.309017 -0.809017  0.500000        0.00000            
 REMARK 350   BIOMT2   2  0.809017  0.500000  0.309017        0.00000            
 REMARK 350   BIOMT3   2 -0.500000  0.309017  0.809017        0.00000
 
             
             or, as fount in http://www.ebi.ac.uk/msd-srv/pqs/pqs-doc/macmol/1k28.mmol
             
REMARK 350 AN OLIGOMER OF TYPE :HEXAMERIC : CAN BE ASSEMBLED BY
REMARK 350 APPLYING THE FOLLOWING TO CHAINS:
REMARK 350 A, D
REMARK 350   BIOMT1   1  1.000000  0.000000  0.000000        0.00000
REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000
REMARK 350   BIOMT3   1  0.000000  0.000000  1.000000        0.00000
REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:
REMARK 350 A, D
REMARK 350   BIOMT1   2  0.000000 -1.000000  0.000000        0.00000
REMARK 350   BIOMT2   2  1.000000 -1.000000  0.000000        0.00000
REMARK 350   BIOMT3   2  0.000000  0.000000  1.000000        0.00000
REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:
REMARK 350 A, D
REMARK 350   BIOMT1   3 -1.000000  1.000000  0.000000        0.00000
REMARK 350   BIOMT2   3 -1.000000  0.000000  0.000000        0.00000
REMARK 350   BIOMT3   3  0.000000  0.000000  1.000000        0.00000


*/
 
  private List biomolecules;
  private List biomts;
  
  private void remark350() throws Exception {
    List biomts = null;
    biomolecules = new ArrayList();
    chainAtomCounts = new int[255];
    String title = "";
    String chainlist = "";
    int iMolecule = 0;
    boolean needLine = true;
    Hashtable info = null;
    int nBiomt = 0;
    Matrix4f mIdent = new Matrix4f();
    mIdent.setIdentity();
    while (true) {
      if (needLine)
        readLine();
      else
        needLine = true;
      if (line == null || !line.startsWith("REMARK 350"))
        break;
      try {
        if (line.startsWith("REMARK 350 BIOMOLECULE:")) {
          if (nBiomt > 0)
            Logger.info("biomolecule " + iMolecule + ": number of transforms: "
                + nBiomt);
          info = new Hashtable();
          biomts = new ArrayList();
          iMolecule = parseInt(line.substring(line.indexOf(":") + 1));
          title = line.trim();
          info.put("molecule", new Integer(iMolecule));
          info.put("title", title);
          info.put("chains", "");
          info.put("biomts", biomts);
          biomolecules.add(info);
          nBiomt = 0;
          //continue; need to allow for next IF, in case this is a reconstruction
        }
        if (line.indexOf("APPLY THE FOLLOWING TO CHAINS:") >= 0) {
          if (info == null) {
            // need to initialize biomolecule business first and still flag this section
            // see http://www.ebi.ac.uk/msd-srv/pqs/pqs-doc/macmol/1k28.mmol
            needLine = false;
            line = "REMARK 350 BIOMOLECULE: 1  APPLY THE FOLLOWING TO CHAINS:";
            continue;
          }
          chainlist = ":" + line.substring(41).trim().replace(' ', ':');
          needLine = false;
          while (readLine() != null && line.indexOf("BIOMT") < 0)
            chainlist += ":" + line.substring(11).trim().replace(' ', ':');
          if (checkFilter("BIOMOLECULE " + iMolecule + ";")) {
            setFilter(filter.replace(':', '_') + chainlist);
            Logger.info("filter set to \"" + filter + "\"");
            this.biomts = biomts;
          }
          if (info == null)
            return; //bad file format
          info.put("chains", chainlist);
          continue;
        }
        /*
         0         1         2         3         4         5         6         7
         0123456789012345678901234567890123456789012345678901234567890123456789
         REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000
         */
        if (line.startsWith("REMARK 350   BIOMT1 ")) {
          nBiomt++;
          float[] mat = new float[16];
          for (int i = 0; i < 12;) {
            String[] tokens = getTokens();
            mat[i++] = parseFloat(tokens[4]);
            mat[i++] = parseFloat(tokens[5]);
            mat[i++] = parseFloat(tokens[6]);
            mat[i++] = parseFloat(tokens[7]);
            if (i == 4 || i == 8)
              readLine();
          }
          mat[15] = 1;
          Matrix4f m4 = new Matrix4f();
          m4.set(mat);
          if (m4.equals(mIdent))
            biomts.add(0, m4);
          else
            biomts.add(m4);
          continue;
        }
      } catch (Exception e) {
        // probably just 
        this.biomts = null;
        this.biomolecules = null;
        return;
      }
    }
    if (nBiomt > 0)
      Logger.info("biomolecule " + iMolecule + ": number of transforms: "
          + nBiomt);
  }


  /*
REMARK 290                                                                      
REMARK 290 CRYSTALLOGRAPHIC SYMMETRY                                            
REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: P 1 21 1                         
REMARK 290                                                                      
REMARK 290      SYMOP   SYMMETRY                                                
REMARK 290     NNNMMM   OPERATOR                                                
REMARK 290       1555   X,Y,Z                                                   
REMARK 290       2555   -X,Y+1/2,-Z                                             
REMARK 290                                                                      
REMARK 290     WHERE NNN -> OPERATOR NUMBER                                     
REMARK 290           MMM -> TRANSLATION VECTOR                                  
REMARK 290                                                                      
REMARK 290 CRYSTALLOGRAPHIC SYMMETRY TRANSFORMATIONS                            
REMARK 290 THE FOLLOWING TRANSFORMATIONS OPERATE ON THE ATOM/HETATM             
REMARK 290 RECORDS IN THIS ENTRY TO PRODUCE CRYSTALLOGRAPHICALLY                
REMARK 290 RELATED MOLECULES.                                                   
REMARK 290   SMTRY1   1  1.000000  0.000000  0.000000        0.00000            
REMARK 290   SMTRY2   1  0.000000  1.000000  0.000000        0.00000            
REMARK 290   SMTRY3   1  0.000000  0.000000  1.000000        0.00000            
REMARK 290   SMTRY1   2 -1.000000  0.000000  0.000000        0.00000            
REMARK 290   SMTRY2   2  0.000000  1.000000  0.000000        9.32505            
REMARK 290   SMTRY3   2  0.000000  0.000000 -1.000000        0.00000            
REMARK 290                                                                      
REMARK 290 REMARK: NULL                                                         


   */
  private void remark290() throws Exception {
    while (readLine() != null && line.startsWith("REMARK 290")) {
      if (line.indexOf("NNNMMM   OPERATOR") >= 0) {
        while (readLine() != null) {
          String[] tokens = getTokens();
          if (tokens.length < 4)
            break;
          setSymmetryOperator(tokens[3]);
        }
      }
    }
  }


  private int atomCount;
  private String lastAtomData;
  private int lastAtomIndex;
  private int iAtom;
  
  private void atom(int serial) {
    Atom atom = new Atom();
    atom.atomName = line.substring(12, 16).trim();
    char ch = line.charAt(16);
    if (ch != ' ')
      atom.alternateLocationID = ch;
    atom.group3 = parseToken(line, 17, 20);
    ch = line.charAt(21);
    if (chainAtomCounts != null)
      chainAtomCounts[ch]++;
    atom.chainID = ch;
    atom.sequenceNumber = parseInt(line, 22, 26);
    atom.insertionCode = JmolAdapter.canonizeInsertionCode(line.charAt(26));


    if (!filterAtom(atom, iAtom++))
      return;


    atom.atomSerial = serial;
    if (serial > maxSerial)
      maxSerial = serial;
    if (atom.group3 == null) {
      if (currentGroup3 != null) {
        currentGroup3 = null;
        currentResno = Integer.MIN_VALUE;
        htElementsInCurrentGroup = null;
      }
    } else if (!atom.group3.equals(currentGroup3) || atom.sequenceNumber != currentResno) {
      currentGroup3 = atom.group3;
      currentResno = atom.sequenceNumber;
      htElementsInCurrentGroup = (Hashtable) htFormul.get(atom.group3);
      nRes++;
      if (atom.group3.equals("UNK"))
        nUNK++;
    }
    boolean isHetero = line.startsWith("HETATM");
    atom.isHetero = isHetero;
    atom.elementSymbol = deduceElementSymbol(isHetero);
    //calculate the charge from cols 79 & 80 (1-based): 2+, 3-, etc
    int charge = 0;
    if (lineLength >= 80) {
      char chMagnitude = line.charAt(78);
      char chSign = line.charAt(79);
      if (chSign >= '0' && chSign <= '7') {
        char chT = chSign;
        chSign = chMagnitude;
        chMagnitude = chT;
      }
      if ((chSign == '+' || chSign == '-' || chSign == ' ')
          && chMagnitude >= '0' && chMagnitude <= '7') {
        charge = chMagnitude - '0';
        if (chSign == '-')
          charge = -charge;
      }
    }
    atom.formalCharge = charge;
    float partialCharge = readPartialCharge();
    if (partialCharge != Float.MAX_VALUE)
      atom.partialCharge = partialCharge;


    setAtomCoord(atom, parseFloat(line, 30, 38), parseFloat(line, 38, 46),
        parseFloat(line, 46, 54));


    atom.radius = readRadius();
    atom.bfactor = readBFactor();
    atom.occupancy = readOccupancy();
    
    lastAtomData = line.substring(6, 26);
    lastAtomIndex = atomSetCollection.getAtomCount();
    if (haveMappedSerials)
      atomSetCollection.addAtomWithMappedSerialNumber(atom);
    else
      atomSetCollection.addAtom(atom);
    if (atomCount++ == 0)
      atomSetCollection.setAtomSetAuxiliaryInfo("isPDB", Boolean.TRUE);
    // note that values are +1 in this serial map
    if (isHetero) {
      if (htHetero != null) {
        atomSetCollection.setAtomSetAuxiliaryInfo("hetNames", htHetero);
        htHetero = null;
      }
    }
  }


  private int lastGroup = Integer.MIN_VALUE;
  private char lastInsertion;
  private char lastAltLoc;
  private int conformationIndex;
  StringBuffer sbIgnored, sbSelected;


  protected boolean filterAtom(Atom atom, int iAtom) {
    if (!super.filterAtom(atom, iAtom))
      return false;
    if (configurationPtr > 0) {
      if (atom.sequenceNumber != lastGroup || atom.insertionCode != lastInsertion) {
        conformationIndex = configurationPtr - 1;
        lastGroup = atom.sequenceNumber;
        lastInsertion = atom.insertionCode;
        lastAltLoc = '\0';
      }
      // ignore atoms that have no designation
      if (atom.alternateLocationID != '\0') {
        // count down until we get the desired index into the list
        String msg = " atom [" + atom.group3 + "]"
                           + atom.sequenceNumber 
                           + (atom.insertionCode == '\0' ? "" : "^" + atom.insertionCode)
                           + (atom.chainID == '\0' ? "" : ":" + atom.chainID)
                           + "." + atom.atomName
                           + "%" + atom.alternateLocationID + "\n";
        if (conformationIndex >= 0 && atom.alternateLocationID != lastAltLoc) {
          lastAltLoc = atom.alternateLocationID;
          conformationIndex--;
        }
        if (conformationIndex < 0 && atom.alternateLocationID != lastAltLoc) {
          sbIgnored.append("ignoring").append(msg);
          return false;
        }
        sbSelected.append("loading").append(msg);
      }
    }
    return true;
  }
  
  protected int readOccupancy() {


    /****************************************************************
     * read the occupancy from cols 55-60 (1-based)
     * should be in the range 0.00 - 1.00
     ****************************************************************/
    int occupancy = 100;
    float floatOccupancy = parseFloat(line, 54, 60);
    if (!Float.isNaN(floatOccupancy))
      occupancy = (int) (floatOccupancy * 100);
    return occupancy;
  }
  
  protected float readBFactor() {
    /****************************************************************
     * read the bfactor from cols 61-66 (1-based)
     ****************************************************************/
    return parseFloat(line, 60, 66);
  }
  
  protected float readPartialCharge() {
    return Float.MAX_VALUE; 
  }
  
  protected float readRadius() {
    return Float.NaN; 
  }
  
  private String deduceElementSymbol(boolean isHetero) {
    if (lineLength >= 78) {
      char ch76 = line.charAt(76);
      char ch77 = line.charAt(77);
      if (ch76 == ' ' && Atom.isValidElementSymbol(ch77))
        return "" + ch77;
      if (Atom.isValidElementSymbolNoCaseSecondChar(ch76, ch77))
        return "" + ch76 + ch77;
    }
    char ch12 = line.charAt(12);
    char ch13 = line.charAt(13);
    if ((htElementsInCurrentGroup == null ||
         htElementsInCurrentGroup.get(line.substring(12, 14)) != null) &&
        Atom.isValidElementSymbolNoCaseSecondChar(ch12, ch13))
      return (isHetero || ch12 != 'H' ? "" + ch12 + ch13 : "H");
    // not a known two-letter code
    if (ch12 == 'H') // added check for PQR files "HD22" for example
      return "H";
    // check for " NZ" for example
    if ((htElementsInCurrentGroup == null ||
         htElementsInCurrentGroup.get("" + ch13) != null) &&
        Atom.isValidElementSymbol(ch13))
      return "" + ch13;
    // check for misplaced "O   " for example
    if ((htElementsInCurrentGroup == null ||
         htElementsInCurrentGroup.get("" + ch12) != null) &&
        Atom.isValidElementSymbol(ch12))
      return "" + ch12;
    return "Xx";
  }


  private StringBuffer sbConect;
  private void conect() {
    // adapted for improper non-crossreferenced files such as 1W7R
    if (sbConect == null)
      sbConect = new StringBuffer();
    int sourceSerial = -1;
    sourceSerial = parseInt(line, 6, 11);
    if (sourceSerial < 0)
      return;
    for (int i = 0; i < 9; i += (i == 5 ? 2 : 1)) {
      int offset = i * 5 + 11;
      int offsetEnd = offset + 5;
      int targetSerial = (offsetEnd <= lineLength ? parseInt(line, offset,
          offsetEnd) : -1);
      if (targetSerial < 0)
        continue;
      int i1;
      boolean isSwapped = (targetSerial < sourceSerial);
      if (isSwapped) {
        i1 = targetSerial;
        targetSerial = sourceSerial;
      } else {
        i1 = sourceSerial;
      }
      String st = ";" + i1 + " " + targetSerial + ";";
      if (sbConect.indexOf(st) >= 0)
        continue;
      sbConect.append(st);
      atomSetCollection.addConnection(new int[] { i1, targetSerial,
          i < 4 ? 1 : JmolAdapter.ORDER_HBOND });
    }
  }


  /*
          1         2         3
0123456789012345678901234567890123456
HELIX    1  H1 ILE      7  LEU     18
HELIX    2  H2 PRO     19  PRO     19
HELIX    3  H3 GLU     23  TYR     29
HELIX    4  H4 THR     30  THR     30
SHEET    1  S1 2 THR     2  CYS     4
SHEET    2  S2 2 CYS    32  ILE    35
SHEET    3  S3 2 THR    39  PRO    41
TURN     1  T1 GLY    42  TYR    44


HELIX     1 H1 ILE A    7  PRO A   19
HELIX     2 H2 GLU A   23  THR A   30
SHEET     1 S1 0 CYS A   3  CYS A   4
SHEET     2 S2 0 CYS A  32  ILE A  35


HELIX  113 113 ASN H  307  ARG H  327  1                                  21    
SHEET    1   A 6 ASP A  77  HIS A  80  0                                        
SHEET    2   A 6 GLU A  47  ILE A  51  1  N  ILE A  48   O  ASP A  77           
SHEET    3   A 6 ARG A  22  ILE A  26  1  N  VAL A  23   O  GLU A  47           


TYPE OF HELIX CLASS NUMBER (COLUMNS 39 - 40)
--------------------------------------------------------------
Right-handed alpha (default) 1
Right-handed omega 2
Right-handed pi 3
Right-handed gamma 4
Right-handed 310 5
Left-handed alpha 6
Left-handed omega 7
Left-handed gamma 8
27 ribbon/helix 9
Polyproline 10


   */
  
  private void structure() {
    int structureType = 0;
    int substructureType = 0;
    int startChainIDIndex;
    int startIndex;
    int endChainIDIndex;
    int endIndex;
    int strandCount = 0;
    if (line.startsWith("HELIX ")) {
      structureType = Structure.PROTEIN_STRUCTURE_HELIX;
      startChainIDIndex = 19;
      startIndex = 21;
      endChainIDIndex = 31;
      endIndex = 33;
      if (line.length() >= 40)
        substructureType = Structure.getHelixType(parseInt(line.substring(38, 40)));
    } else if (line.startsWith("SHEET ")) {
      structureType = Structure.PROTEIN_STRUCTURE_SHEET;
      startChainIDIndex = 21;
      startIndex = 22;
      endChainIDIndex = 32;
      endIndex = 33;
      strandCount = parseInt(line.substring(14, 16));
    } else if (line.startsWith("TURN  ")) {
      structureType = Structure.PROTEIN_STRUCTURE_TURN;
      startChainIDIndex = 19;
      startIndex = 20;
      endChainIDIndex = 30;
      endIndex = 31;
    } else
      return;


    if (lineLength < endIndex + 4)
      return;


    String structureID = line.substring(11, 15).trim();
    int serialID = parseInt(line.substring(7, 10));
    char startChainID = line.charAt(startChainIDIndex);
    int startSequenceNumber = parseInt(line, startIndex, startIndex + 4);
    char startInsertionCode = line.charAt(startIndex + 4);
    char endChainID = line.charAt(endChainIDIndex);
    int endSequenceNumber = parseInt(line, endIndex, endIndex + 4);
    // some files are chopped to remove trailing whitespace
    char endInsertionCode = ' ';
    if (lineLength > endIndex + 4)
      endInsertionCode = line.charAt(endIndex + 4);


    // this should probably call Structure.validateAndAllocate
    // in order to check validity of parameters
    // model number set to -1 here to indicate ALL MODELS
    if (substructureType == 0)
      substructureType = structureType;
    Structure structure = new Structure(-1, structureType, substructureType,
        structureID, serialID, strandCount, startChainID, startSequenceNumber,
        startInsertionCode, endChainID, endSequenceNumber, endInsertionCode);
    atomSetCollection.addStructure(structure);
  }


  private int getModelNumber() {
    try {
      int startModelColumn = 6; // should be 10 0-based
      int endModelColumn = 14;
      if (endModelColumn > lineLength)
        endModelColumn = lineLength;
      return parseInt(line, startModelColumn, endModelColumn);
    } catch (NumberFormatException e) {
      return 0;
    }
  }
  
  private void model(int modelNumber) {
    /****************************************************************
     * mth 2004 02 28
     * note that the pdb spec says:
     * COLUMNS       DATA TYPE      FIELD         DEFINITION
     * ----------------------------------------------------------------------
     *  1 -  6       Record name    "MODEL "
     * 11 - 14       Integer        serial        Model serial number.
     *
     * but I received a file with the serial
     * number right after the word MODEL :-(
     ****************************************************************/
    checkNotPDB();
    sbConect = null;
    haveMappedSerials = false;
    atomSetCollection.newAtomSet();
    atomSetCollection.setAtomSetAuxiliaryInfo("isPDB", Boolean.TRUE);
    atomSetCollection.setAtomSetNumber(modelNumber);
  }


  private void checkNotPDB() {
    if (nRes > 0 && nUNK == nRes)
      atomSetCollection.setAtomSetAuxiliaryInfo("isPDB", Boolean.FALSE);
    nUNK = nRes = 0;
    currentGroup3 = null;
  }


  private void cryst1() throws Exception {
    float a = getFloat(6, 9);
    if (a == 1)
      a = Float.NaN; // 1 for a means no unit cell
    setUnitCell(a, getFloat(15, 9), getFloat(24, 9), getFloat(33,
        7), getFloat(40, 7), getFloat(47, 7));
    setSpaceGroupName(parseTrimmed(line, 55, 66));
  }


  private float getFloat(int ich, int cch) throws Exception {
    return parseFloat(line, ich, ich+cch);
  }


  private void scale(int n) throws Exception {
    int pt = n * 4 + 2;
    setUnitCellItem(pt++,getFloat(10, 10));
    setUnitCellItem(pt++,getFloat(20, 10));
    setUnitCellItem(pt++,getFloat(30, 10));
    setUnitCellItem(pt++,getFloat(45, 10));
  }


  private void expdta() {
    if (line.toUpperCase().indexOf("NMR") >= 0)
      atomSetCollection.setAtomSetCollectionAuxiliaryInfo("isNMRdata", "true");
  }


  private void formul() {
    String groupName = parseToken(line, 12, 15);
    String formula = parseTrimmed(line, 19, 70);
    int ichLeftParen = formula.indexOf('(');
    if (ichLeftParen >= 0) {
      int ichRightParen = formula.indexOf(')');
      if (ichRightParen < 0 || ichLeftParen >= ichRightParen ||
          ichLeftParen + 1 == ichRightParen ) // pick up () case in 1SOM.pdb
        return; // invalid formula;
      formula = parseTrimmed(formula, ichLeftParen + 1, ichRightParen);
    }
    Hashtable htElementsInGroup = (Hashtable)htFormul.get(groupName);
    if (htElementsInGroup == null)
      htFormul.put(groupName, htElementsInGroup = new Hashtable());
    // now, look for atom names in the formula
    next[0] = 0;
    String elementWithCount;
    while ((elementWithCount = parseTokenNext(formula)) != null) {
      if (elementWithCount.length() < 2)
        continue;
      char chFirst = elementWithCount.charAt(0);
      char chSecond = elementWithCount.charAt(1);
      if (Atom.isValidElementSymbolNoCaseSecondChar(chFirst, chSecond))
        htElementsInGroup.put("" + chFirst + chSecond, Boolean.TRUE);
      else if (Atom.isValidElementSymbol(chFirst))
        htElementsInGroup.put("" + chFirst, Boolean.TRUE);
    }
  }
  
  private void het() {
    if (line.length() < 30)
      return;
    if (htHetero == null)
      htHetero = new Hashtable();
    String groupName = parseToken(line, 7, 10);
    if (htHetero.containsKey(groupName))
      return;
    String hetName = parseTrimmed(line, 30, 70);
    htHetero.put(groupName, hetName);
  }
  
  private void hetnam() {
    if (htHetero == null)
      htHetero = new Hashtable();
    String groupName = parseToken(line, 11, 14);
    String hetName = parseTrimmed(line, 15, 70);
    if (groupName == null) {
      Logger.error("ERROR: HETNAM record does not contain a group name: " + line);
      return;
    }
    String htName = (String) htHetero.get(groupName);
    if (htName != null)
      hetName = htName + hetName;
    htHetero.put(groupName, hetName);
    //Logger.debug("hetero: "+groupName+" "+hetName);
  }
  
  /*
 The ANISOU records present the anisotropic temperature factors.


Record Format


COLUMNS        DATA TYPE       FIELD         DEFINITION                  
----------------------------------------------------------------------
 1 -  6        Record name     "ANISOU"                                  


 7 - 11        Integer         serial        Atom serial number.         


13 - 16        Atom            name          Atom name.                  


17             Character       altLoc        Alternate location indicator.                  


18 - 20        Residue name    resName       Residue name.               


22             Character       chainID       Chain identifier.           


23 - 26        Integer         resSeq        Residue sequence number.    


27             AChar           iCode         Insertion code.             


29 - 35        Integer         u[0][0]       U(1,1)                


36 - 42        Integer         u[1][1]       U(2,2)                


43 - 49        Integer         u[2][2]       U(3,3)                


50 - 56        Integer         u[0][1]       U(1,2)                


57 - 63        Integer         u[0][2]       U(1,3)                


64 - 70        Integer         u[1][2]       U(2,3)                


73 - 76        LString(4)      segID         Segment identifier, left-justified.


77 - 78        LString(2)      element       Element symbol, right-justified.


79 - 80        LString(2)      charge        Charge on the atom.       


Details


* Columns 7 - 27 and 73 - 80 are identical to the corresponding ATOM/HETATM record.


* The anisotropic temperature factors (columns 29 - 70) are scaled by a factor of 10**4 (Angstroms**2) and are presented as integers.


* The anisotropic temperature factors are stored in the same coordinate frame as the atomic coordinate records. 
   */
  private boolean  haveMappedSerials;
  
  private void anisou() {
    float[] data = new float[8];
    data[6] = 1; //U not B
    int serial = parseInt(line, 6, 11);
    int index;
    if (line.substring(6, 26).equals(lastAtomData)) {
      index = lastAtomIndex;
    } else {
      if (!haveMappedSerials)
        atomSetCollection.createAtomSerialMap();
      index = atomSetCollection.getAtomSerialNumberIndex(serial);
      haveMappedSerials = true;
    }
    if (index < 0) {
      //normal when filtering
      //System.out.println("ERROR: ANISOU record does not correspond to known atom");
      return;
    }
    Atom atom = atomSetCollection.getAtom(index);
    for (int i = 28, pt = 0; i < 70; i += 7, pt++)
      data[pt] = parseFloat(line, i, i + 7);
    for (int i = 0; i < 6; i++) {
      if (Float.isNaN(data[i])) {
          Logger.error("Bad ANISOU record: " + line);
          return;
      }
      data[i] /= 10000f;
    }
    atomSetCollection.setAnisoBorU(atom, data, 8);
    // Ortep Type 8: D = 2pi^2, C = 2, a*b*
  }
  /*
   * http://www.wwpdb.org/documentation/format23/sect7.html
   * 
 Record Format


COLUMNS       DATA TYPE         FIELD            DEFINITION
------------------------------------------------------------------------
 1 -  6       Record name       "SITE    "
 8 - 10       Integer           seqNum      Sequence number.
12 - 14       LString(3)        siteID      Site name.
16 - 17       Integer           numRes      Number of residues comprising 
                                            site.


19 - 21       Residue name      resName1    Residue name for first residue
                                            comprising site.
23            Character         chainID1    Chain identifier for first residue
                                            comprising site.
24 - 27       Integer           seq1        Residue sequence number for first
                                            residue comprising site.
28            AChar             iCode1      Insertion code for first residue
                                            comprising site.
30 - 32       Residue name      resName2    Residue name for second residue
...
41 - 43       Residue name      resName3    Residue name for third residue
...
52 - 54       Residue name      resName4    Residue name for fourth residue
 
   */
  
  private void site() {
    if (htSites == null)
      htSites = new Hashtable();
    //int seqNum = parseInt(line, 7, 10);
    int nResidues = parseInt(line, 15, 17);
    String siteID = parseTrimmed(line, 11, 14);
    Hashtable htSite = (Hashtable) htSites.get(siteID);
    if (htSite == null) {
      htSite = new Hashtable();
      //htSite.put("seqNum", "site_" + seqNum);
      htSite.put("nResidues", new Integer(nResidues));
      htSite.put("groups", "");
      htSites.put(siteID, htSite);
    }
    String groups = (String)htSite.get("groups");
    for (int i = 0; i < 4; i++) {
      int pt = 18 + i * 11;
      String resName = parseTrimmed(line, pt, pt + 3);
      if (resName.length() == 0)
        break;
      String chainID = parseTrimmed(line, pt + 4, pt + 5);
      String seq = parseTrimmed(line, pt + 5, pt + 9);
      String iCode = parseTrimmed(line, pt + 9, pt + 10);
      groups += (groups.length() == 0 ? "" : ",") + "[" + resName + "]" + seq;
      if (iCode.length() > 0)
        groups += "^" + iCode;
      if (chainID.length() > 0)
        groups += ":" + chainID;
      htSite.put("groups", groups);
    }
  }
}
Source Code of org.jmol.adapter.readers.cifpdb.PdbReader

Related Classes of org.jmol.adapter.readers.cifpdb.PdbReader