使用Python去除C/C++源程序中的所有注释和空行
说明使用Python的字符串处理和正则表达式处理实现了一个删除C/C++源程序中所有注释和空行的小脚本。
使用字符串处理
1: # delete all the comments and empty line of a C/C++ source file 2: import os, sys,string 3: 4: #------------------------------------------------------------- 5: def usage(): 6: print u''' 7: help: del_comment.py 8: ''' 9: #--------------------------------------------------------------10: def deal_file(src):11: # file exist or not12: if not os.path.exists(src):13: print 'Error: file - %s doesn\'t exist.'% src14: return False15: if os.path.islink(src):16: print 'Error: file - %s is a link.'17: return False18: filetype = (os.path.splitext(src))19: if not filetype in ['.c','.h','.cpp','.hh','.cc']:20: return False21: try:22: if not os.access(src, os.W_OK):23: os.chmod(src, 0664)24: except:25: print 'Error: you can not chang %s\'s mode.'% src26: 27: inputf = open(src, 'r')28: outputfilename = (os.path.splitext(src)) + '_no_comment'+filetype29: outputf = open(outputfilename, 'w') 30: 31: try:32: #-----find /*.....*/33: rFlag=034: line=inputf.readline()35: while(line):36: fm=string.find(line,'/*')37: if fm!=-1:# find a /*38: if fm>1:# not empty line39: outputf.write(line[:fm]+'\n')40: rFlag=141: # find */42: fm=string.find(line,'*/')43: if fm!=-1:44: rFlag=045: else:46: line=inputf.readline()47: while line:48: fm=string.find(line,'*/')49: if fm!=-1:50: rFlag=051: break52: line=inputf.readline()53: if not line:54: print 'Match /*...*/ error'55: else: # deal with //56: fm=string.find(line,'//')57: if fm==-1:58: if len(line)>1: # not empty line59: outputf.write(line)60: elif fm!=-1 and (not rFlag):61: if fm>1: # not empty line62: outputf.write(line[:fm]+'\n')63: #read nextline64: line=inputf.readline()65: except:66: print 'Error: unexcept error.'67: inputf.close()68: outputf.close()69: return True70: 71: #--------------------------------------------------------------72: def deal_dir(src):73: #dir exist or not74: if not os.path.exists(src):75: print 'Error: dir - %s is not exist.'%s (src)76: return False77: filelists = os.listdir(src)78: for eachfile in filelists:79: eachfile = src + '/' +eachfile80: if os.path.isdir(eachfile):81: deal_dir(eachfile)82: elif os.path.isfile(eachfile):83: deal_file(eachfile)84: return True85: 86: #--------------------------------------------------------------87: def main():88: if len(sys.argv) < 2:89: usage()90: sys.exit(1)91: src = sys.argv92: # get absolute dir/file path93: if os.path.isdir(src):94: dire = os.path.abspath(src)95: dirFlag = True96: elif os.path.isfile(src):97: fl = os.path.abspath(src)98: dirFlag = False99: else: 100: print 'File input error' 101: 102: # deal 103: if dirFlag: 104: deal_dir(dire) 105: else: 106: deal_file(fl) 107: print 'Successful handle file.' 108: 109: #-------------------------------------------------------------- 110: if __name__ == '__main__': 111: main()
使用正则表达式
1: # delete all the comments and empty line of a C/C++ source file 2: import os, sys,string,re,glob 3: 4: # /*..*///... 5: Rule1 = "(\/\*(\s|.)*?\*\/)|(\/\/.*)" 6: c1=re.compile(Rule1) 7: 8: #------------------------------------------------------------- 9: def usage():10: print u'''11: help: del_comment.py 12: '''13: #--------------------------------------------------------------14: def deal_file(src):15: # file exist or not16: if not os.path.exists(src):17: print 'Error: file - %s doesn\'t exist.'% src18: return False19: if os.path.islink(src):20: print 'Error: file - %s is a link.'21: return False22: filetype = (os.path.splitext(src))23: if not filetype in ['.c','.h','.cpp','.hh','.cc']:24: return False25: try:26: if not os.access(src, os.W_OK):27: os.chmod(src, 0664)28: except:29: print 'Error: you can not chang %s\'s mode.'% src30: 31: inputf = open(src, 'r')32: outputfilename = (os.path.splitext(src)) + '_no_comment'+filetype33: outputf = open(outputfilename, 'w')34: 35: lines=inputf.read()36: inputf.close()37: lines=re.sub(Rule1,"",lines)38: outputf.write(lines) 39: outputf.close()40: return True41: 42: #--------------------------------------------------------------43: def deal_dir(src):44: #dir exist or not45: if not os.path.exists(src):46: print 'Error: dir - %s is not exist.'%s (src)47: return False48: filelists = os.listdir(src)49: for eachfile in filelists:50: eachfile = src + '/' +eachfile51: if os.path.isdir(eachfile):52: deal_dir(eachfile)53: elif os.path.isfile(eachfile):54: deal_file(eachfile)55: return True56: 57: #--------------------------------------------------------------58: def main():59: if len(sys.argv) < 2:60: usage()61: sys.exit(1)62: src = sys.argv63: # get absolute dir/file path64: if os.path.isdir(src):65: dire = os.path.abspath(src)66: dirFlag = True67: elif os.path.isfile(src):68: fl = os.path.abspath(src)69: dirFlag = False70: else:71: print 'File input error'72: 73: # deal74: if dirFlag:75: deal_dir(dire)76: else:77: deal_file(fl)78: print 'Successful handle file.'79: 80: #--------------------------------------------------------------81: if __name__ == '__main__':82: main()
使用示例
待处理文件:
1: #ifndef _RS232_H_ 2: #define _RS232_H_ 3: 4: /* the maximum number of ports we are willing to open */ 5: #define MAX_PORTS 4 6: 7: /*this array hold information about each port we have opened */ 8: struct PortInfo{ 9: int busy;10: char name;11: int handle;12: };13: 14: int OpenCom(int portNo,const char deviceName[],long baudRate);15: int CloseCom(int portNo);16: int ComRd(int portNo,char buf[],int maxCnt,int Timeout);17: int ComWrt(int portNo,const char * buf,int maxCnt);18: 19: //long GetBaudRate(long baudRate);20: //int OpenComConfig(int port,21: // const char deviceName[],22: // long baudRate,23: // int parity,24: // int dataBits,25: // int stopBits,26: // int iqSize,27: // int oqSize);28: 29: #endif 处理结果:
1: #ifndef _RS232_H_ 2: #define _RS232_H_ 3: #define MAX_PORTS 4 4: struct PortInfo{ 5: int busy; 6: char name; 7: int handle; 8: }; 9: int OpenCom(int portNo,const char deviceName[],long baudRate);10: int CloseCom(int portNo);11: int ComRd(int portNo,char buf[],int maxCnt,int Timeout);12: int ComWrt(int portNo,const char * buf,int maxCnt);13: #endif 在使用Python正则表达式处理的时候,有一个问题,就是没法删除文件中的空行。如果想删除空行,可能还得一行行读进来,把长度为0的行不保存。这是目前感觉到的难点。
页:
[1]