#include "pdbReader.h" // Constructor // PDBReader::PDBReader(Alphabet* alpha) : alphabet(alpha), filename(0), path(0), fullName(0), structureName(0) { //printf("PDBReader::Constructing\n"); return; } // Destructor // PDBReader::~PDBReader() { delete filename; delete path; delete fullName; delete structureName; return; } // setFilename // Set the name for the structure file to be read; // If the initial name does not correspond to a // readable file, checks are done to see if adding // suffixes .pdb or .ent will return a valid file int PDBReader::setFilename(char* fn) { //printf("=>PDBReader::setFilename\n"); if (filename != 0) { delete filename; filename = 0; } if (fullName != 0) { delete fullName; fullName = 0; } int len = strlen(fn); char* tempName = new char[len+5]; // len+1 is normal (+4) for suffix strncpy(tempName,fn,len); tempName[len] = '\0'; filename = new char[len+1]; strncpy(filename,fn,len); filename[len] = '\0'; // Given name works if (checkFullName() == 1) { //printf("\n %s works\n\n",filename); setNameFromFilename(); //printf("<=PDBReader::setFilename\n"); return 1; } //printf(" %s doesn't work\n",filename); delete filename; filename = new char[len+5]; strncpy(filename,fn,len); filename[len] = '\0'; strncat(filename,".pdb",4); // Name + ".pdb" works if (checkFullName() == 1) { //printf("\n %s works\n\n",filename); // Always setNameFromFilename, people can change the name later setNameFromFilename(); //printf("<=PDBReader::setFilename\n"); return 1; } //printf(" %s doesn't work\n",filename); filename[len] = '\0'; strncat(filename,".ent",4); // Name + ".ent" works if (checkFullName() == 1) { //printf("\n %s works\n\n",filename); setNameFromFilename(); //printf("<=PDBReader::setFilename\n"); return 1; } //printf(" %s doesn't work\n",filename); delete filename; filename = 0; //printf("<=setFilename: 0\n"); return 0; } int PDBReader::setPath(char* p) { //printf("=>setPath\n"); //printf(" p: %s\n",p); int len = strlen(p); if (path != 0) delete path; path = new char[len+1]; strncpy(path,p,len); path[len] = '\0'; //printf("<=setPath\n"); return 1; } Structure* PDBReader::getStructure(int structIndex) { //printf("=>getStructure(%d)\n",structIndex); FILE* infile = fopen(fullName,"r"); //printf(" fullName: %s\n",fullName); if (infile == NULL) { //printf("<=getStructure(): 0\n"); return 0; } FILE *fpError = fopen("pdbError","a"); int len = getElementCount(); Structure* struc = new Structure(len, alphabet); if (structureName != 0) { //printf("structureName: %s\n", structureName); struc->setName(structureName); } char* tempStr1 = new char[1024]; char* tempStr2 = new char[16]; char* symbolStr = new char[4]; symbolStr[3] = '\0'; char* bb = alphabet->getBackbone(); Coordinate3D* tempCoord = 0; // int bbLen = strlen(bb); int pdbIndex = 0; // index within pdb file int pdbIndexOld = 0; int elementCount = 0; int flag = 0; //int check = 0; while (!feof(infile) && flag == 0) { fgets(tempStr1,1023,infile); if (strncmp(tempStr1,"TER",3) == 0) { flag++; } strncpy(symbolStr,tempStr1+17,3); // Check if pdb residue info is 3-letter or 1-letter code Symbol* symbol = 0; if ( symbolStr[0] == ' ' && symbolStr[1] == ' ' ) { symbol = alphabet->getSymbol(symbolStr[2]); } else if ( symbolStr[0] == ' ' && symbolStr[2] == ' ' ) { symbol = alphabet->getSymbol(symbolStr[1]); } else if ( symbolStr[1] == ' ' && symbolStr[2] == ' ' ) { symbol = alphabet->getSymbol(symbolStr[0]); } else { // 3-letter case symbol = alphabet->getSymbol(symbolStr); } if ( ( strncmp(tempStr1,"ATOM",4) == 0 || strncmp(tempStr1,"HETATM",4) == 0 ) && ( strncmp(tempStr1+13,bb,bbLen) == 0 ) && ( symbol != 0 ) ) { //printf(" In if1\n"); strncpy(tempStr2,tempStr1+23,3); tempStr2[3]='\0'; pdbIndex=charToInt(tempStr2); // if(i==0) seqOffset[iSeq]=num; // Set tempCoord coordinate values if (tempCoord != 0) { delete tempCoord; //printf(" deleting tempCoord\n"); } tempCoord = readCoordinate3DFromFile(tempStr1); //printf("tempCoord: %f, %f, %f\n",tempCoord->getX(),tempCoord->getY(),tempCoord->getZ()); if (tempCoord == 0) { //printf(" In if2\n"); fprintf(fpError,"WARNING: In file %s, could not read element %d\n",filename,pdbIndex); } else if (elementCount > 0 && pdbIndex-pdbIndexOld != 1) { //printf(" In if3\n"); fprintf(fpError,"elementCount: %d\n",elementCount); fprintf(fpError,"WARNING: In file %s, CA neighbors %d and %d\n",filename,pdbIndexOld,pdbIndex); //Coordinate3D* tempCoordOld = struc->getCoordinate(elementCount-1); Coordinate3D* tempCoordOld = struc->getCoordinate(elementCount-1); float distance = tempCoord->getDistanceTo(tempCoordOld); if (distance > 3.0) { //printf(" In if4\n"); fprintf(fpError,"CA distance = %1.3f > 3.000, so assuming consecutive residues\n",distance); struc->addElement(symbol,tempCoord); elementCount++; } else { //printf(" In if5\n"); fprintf(fpError,"CA distance = %1.3f < 3.000, so assuming duplicate residues\n",distance); //elementCount--; } //delete tempCoordOld; } else { //printf(" In if6\n"); struc->addElement(symbol,tempCoord); elementCount++; } //printf(" Out of if\n"); } //loopCount++; pdbIndexOld = pdbIndex; } fclose(infile); fclose(fpError); delete tempStr1; delete tempStr2; delete symbolStr; delete bb; //delete coords; delete tempCoord; //printf("<=getStructure()\n"); return struc; } Structure* PDBReader::getNextStructure() { return 0; } int PDBReader::getElementCount() { //printf("Entering getElementCount()\n"); FILE* infile = fopen(fullName,"r"); if (infile == NULL) { return 0; } char* tempStr = new char[1024]; int len = 0; int flag = 0; char* bb = alphabet->getBackbone(); int bbLen = strlen(bb); fgets(tempStr,1023,infile); while (!feof(infile) && flag == 0) { //printf("%s\n",tempStr); if (strncmp(tempStr,"TER",3) == 0) { flag++; } if ( ( strncmp(tempStr,"ATOM",4) == 0 || strncmp(tempStr,"HETATM",4) == 0 ) && ( strncmp(tempStr+13,bb,bbLen) == 0 ) ) { len++; } fgets(tempStr,1023,infile); } fclose(infile); delete bb; //printf(" len = %d\n",len); //printf("Exiting getElementCount()\n"); return len; } Coordinate3D* PDBReader::readCoordinate3DFromFile(char* str) { float* coordArray = new float[3]; char* tempStr = new char[16]; strncpy(tempStr,str+30,8); // X tempStr[8]='\0'; coordArray[0] = charToFloat(tempStr); strncpy(tempStr,str+38,8); // Y tempStr[8]='\0'; coordArray[1] = charToFloat(tempStr); strncpy(tempStr,str+46,8); // Z tempStr[8]='\0'; coordArray[2] = charToFloat(tempStr); Coordinate3D* coord = new Coordinate3D(coordArray[0],coordArray[1],coordArray[2]); //printf("coord: %f, %f, %f\n",coord->getX(),coord->getY(),coord->getZ()); delete coordArray; delete tempStr; return coord; } // checkFullName // Make sure that fullName corresponds to an // accessible file int PDBReader::checkFullName() { //printf("=>PDBReader::checkFullName\n"); //printf(" path (%d): %s\n",strlen(path),path); //printf(" filename (%d): %s\n",strlen(filename),filename); if (filename == 0) { return 0; } if (fullName == 0) { if (path == 0) { //printf(" checking1: %s\n",filename); //FILE* infile = fopen(filename,"r"); //if (infile == NULL) { //fclose(infile); //return 0; //} //fclose(infile); if (checkPdbFile(filename) == 0) { return 0; } int filenameLen = strlen(filename); fullName = new char[filenameLen + 1]; strncpy(fullName,filename,filenameLen); fullName[filenameLen] = '\0'; //printf(" return1\n"); //printf("<=PDBReader::checkFullName\n"); return 1; } else { int pathLen = strlen(path); int filenameLen = strlen(filename); fullName = new char[pathLen + filenameLen + 1]; strncpy(fullName,path,pathLen); fullName[pathLen] = '\0'; strncat(fullName,filename,filenameLen); fullName[pathLen + filenameLen] = '\0'; //printf(" checking2: %s\n",fullName); //FILE* infile = fopen(fullName,"r"); //if (infile == NULL) { //fclose(infile); //delete fullName; //fullName = 0; //printf("<=PDBReader::checkFullName: 0\n"); //return 0; //} //fclose(infile); if (checkPdbFile(fullName) == 0) { delete fullName; fullName = 0; //printf("<=PDBReader::checkFullName: 0\n"); return 0; } //printf(" return2\n"); //printf("<=PDBReader::checkFullName\n"); return 1; } } else { //printf(" checking3: %s\n",fullName); //FILE* infile = fopen(fullName,"r"); //if (infile == NULL) { //fclose(infile); //delete fullName; //fullName = 0; //printf("<=PDBReader::checkFullName\n"); //return 0; //} //fclose(infile); if (checkPdbFile(fullName) == 0) { delete fullName; fullName = 0; //printf("<=PDBReader::checkFullName: 0\n"); return 0; } //printf(" return3\n"); //printf("<=PDBReader::checkFullName\n"); return 1; } //printf("<=checkFullName: 0\n"); return 0; } // checkPdbFile // int PDBReader::checkPdbFile(char* file) { FILE* infile = fopen(file,"r"); if (infile == NULL) { fclose(infile); return 0; } char* tempString = new char[1024]; int flag = 0; while (!feof(infile) && flag == 0) { fgets(tempString,1023,infile); if (strncmp(tempString,"ATOM",4) == 0 || strncmp(tempString,"HETATM",4) == 0 ) { flag++; } } fclose(infile); delete tempString; if (flag == 0) { return 0; } return 1; } // setName // Set name from the filename minus ".pdb" or ".ent" suffixes int PDBReader::setNameFromFilename() { //printf("=>PDBReader::setNameFromFilename\n"); if (filename == 0) { //printf("<=PDBReader::setNameFromFilename\n"); return 0; } if (structureName != 0) { delete structureName; } int len = strlen(filename); char* tempName = new char[len+1]; strncpy(tempName,filename,len); tempName[len] = '\0'; //printf("tempName: %s\n",tempName); // Check the name for trailing '.pdb' or '.ent' int pdbCheck = 0; int entCheck = 0; int tempLen = len; // Length of current tempName (cutting off suffixes) while ( (pdbCheck == 0 || entCheck == 0) && tempLen >= 0 ) { pdbCheck = strncmp(tempName+(tempLen-4),".pdb",4); entCheck = strncmp(tempName+(tempLen-4),".ent",4); if ( pdbCheck == 0 || entCheck == 0 ) { tempLen -= 4; } } if (tempLen <= 0) { delete tempName; return 0; } tempName[tempLen] = '\0'; len = strlen(tempName); structureName = new char[len+1]; strncpy(structureName,tempName,len); structureName[len] = '\0'; delete tempName; //printf("<=PDBReader::setNameFromFilename\n"); return 1; }