2013-05-08 20:59:06,467 FATAL org.apache.hadoop.mapred.Child: Error running child : java.lang.OutOfMemoryError: Java heap space
at java.nio.HeapByteBuffer.(HeapByteBuffer.java:39)
at java.nio.ByteBuffer.allocate(ByteBuffer.java:312)
at sun.nio.cs.StreamDecoder.(StreamDecoder.java:231)
at sun.nio.cs.StreamDecoder.(StreamDecoder.java:211)
at sun.nio.cs.StreamDecoder.forInputStreamReader(StreamDecoder.java:50)
at java.io.InputStreamReader.(InputStreamReader.java:57)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:211)
at org.apache.hadoop.util.Shell.run(Shell.java:182)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:375)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:461)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:444)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:710)
at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:443)
at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getOwner(RawLocalFileSystem.java:426)
at org.apache.hadoop.mapred.TaskLog.obtainLogDirOwner(TaskLog.java:267)
at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:124)
at org.apache.hadoop.mapred.Child$4.run(Child.java:260)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
节点二
2013-05-08 20:56:37,012 INFO org.apache.hadoop.mapred.Task: Communication exception: java.lang.OutOfMemoryError: Java heap space
at java.io.BufferedReader.(BufferedReader.java:80)
at java.io.BufferedReader.(BufferedReader.java:91)
at org.apache.hadoop.util.ProcfsBasedProcessTree.constructProcessInfo(ProcfsBasedProcessTree.java:396)
at org.apache.hadoop.util.ProcfsBasedProcessTree.getProcessTree(ProcfsBasedProcessTree.java:151)
at org.apache.hadoop.util.LinuxResourceCalculatorPlugin.getProcResourceValues(LinuxResourceCalculatorPlugin.java:401)
at org.apache.hadoop.mapred.Task.updateResourceCounters(Task.java:808)
at org.apache.hadoop.mapred.Task.updateCounters(Task.java:830)
at org.apache.hadoop.mapred.Task.access$600(Task.java:66)
at org.apache.hadoop.mapred.Task$TaskReporter.run(Task.java:666)
at java.lang.Thread.run(Thread.java:662)
节点三
2013-05-08 21:02:26,489 FATAL org.apache.hadoop.mapred.Child: Error running child : java.lang.OutOfMemoryError: Java heap space
at com.sun.org.apache.xerces.internal.xni.XMLString.toString(XMLString.java:185)
at com.sun.org.apache.xerces.internal.parsers.AbstractDOMParser.characters(AbstractDOMParser.java:1185)
at com.sun.org.apache.xerces.internal.xinclude.XIncludeHandler.characters(XIncludeHandler.java:1085)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:464)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:808)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:737)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:119)
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:232)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:284)
at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:180)
at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1168)
at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:1119)
at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:1063)
at org.apache.hadoop.conf.Configuration.get(Configuration.java:416)
at org.apache.hadoop.conf.Configuration.getLong(Configuration.java:521)
at org.apache.hadoop.io.nativeio.NativeIO.ensureInitialized(NativeIO.java:120)
at org.apache.hadoop.io.nativeio.NativeIO.getOwner(NativeIO.java:103)
at org.apache.hadoop.io.SecureIOUtils.openForRead(SecureIOUtils.java:116)
at org.apache.hadoop.mapred.TaskLog.getAllLogsFileDetails(TaskLog.java:191)
at org.apache.hadoop.mapred.TaskLogsTruncater.getAllLogsFileDetails(TaskLogsTruncater.java:342)
at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:134)
at org.apache.hadoop.mapred.Child$4.run(Child.java:260)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
2013-05-08 20:58:47,568 ERROR org.apache.hadoop.security.UserGroupInformation: PriviledgedActionException as:hadoop cause:org.apache.hadoop.io.SecureIOUtils$AlreadyExistsException: EEXIST: File exists
2013-05-08 20:58:47,569 WARN org.apache.hadoop.mapred.Child: Error running child
org.apache.hadoop.io.SecureIOUtils$AlreadyExistsException: EEXIST: File exists
at org.apache.hadoop.io.SecureIOUtils.createForWrite(SecureIOUtils.java:167)
at org.apache.hadoop.mapred.TaskLog.writeToIndexFile(TaskLog.java:312)
at org.apache.hadoop.mapred.TaskLog.syncLogs(TaskLog.java:385)
at org.apache.hadoop.mapred.Child$4.run(Child.java:257)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: EEXIST: File exists
at org.apache.hadoop.io.nativeio.NativeIO.open(Native Method)
at org.apache.hadoop.io.SecureIOUtils.createForWrite(SecureIOUtils.java:161)
... 7 more
最后这个log的确是殊途同归的,因为每个map在OOM之后,tasktracker会尝试重新启动map并重新创建相同的TaskLog的文件句柄,但因为是异常终止的,之前的TaskLog的文件句柄已经被创建,但是OOM了,这个句柄没有正常关闭,所以会报写失败的异常。源码如下
/**
* Open the specified File for write access, ensuring that it does not exist.
* @param f the file that we want to create
* @param permissions we want to have on the file (if security is enabled)
*
* @throws AlreadyExistsException if the file already exists
* @throws IOException if any other error occurred
*/
public static FileOutputStream createForWrite(File f, int permissions)