|
前几天看到篇文章,是用java的API操作HDFS的文件系统。那篇文章单独介绍了每个api的使用,为了以后方便使用,我写成了一个类,现分享给大家。
文件操作类:
比较简单,大家也可以自己完善。
import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IOUtils;public class HDFS_File {//read the file from HDFSpublic void ReadFile(Configuration conf, String FileName){try{FileSystem hdfs = FileSystem.get(conf);FSDataInputStream dis = hdfs.open(new Path(FileName));IOUtils.copyBytes(dis, System.out, 4096, false); dis.close();}catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}//copy the file from HDFS to localpublic void GetFile(Configuration conf, String srcFile, String dstFile){try {FileSystem hdfs = FileSystem.get(conf);Path srcPath = new Path(srcFile);Path dstPath = new Path(dstFile);hdfs.copyToLocalFile(true,srcPath, dstPath);}catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}//copy the local file to HDFSpublic void PutFile(Configuration conf, String srcFile, String dstFile){try {FileSystem hdfs = FileSystem.get(conf);Path srcPath = new Path(srcFile);Path dstPath = new Path(dstFile);hdfs.copyFromLocalFile(srcPath, dstPath);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}//create the new filepublic FSDataOutputStream CreateFile(Configuration conf, String FileName){try {Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path path = new Path(FileName);FSDataOutputStream outputStream = hdfs.create(path);return outputStream;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}//rename the file namepublic boolean ReNameFile(Configuration conf, String srcName, String dstName){try {Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path fromPath = new Path(srcName);Path toPath = new Path(dstName);boolean isRenamed = hdfs.rename(fromPath, toPath);return isRenamed;}catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return false;}//delete the file// tyep = true, delete the directory// type = false, delece the filepublic boolean DelFile(Configuration conf, String FileName, boolean type){try {Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path path = new Path(FileName);boolean isDeleted = hdfs.delete(path, type);return isDeleted;}catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return false;}//Get HDFS file last modification timepublic long GetFileModTime(Configuration conf, String FileName){try{Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path path = new Path(FileName);FileStatus fileStatus = hdfs.getFileStatus(path);long modificationTime = fileStatus.getModificationTime();return modificationTime;}catch(IOException e){e.printStackTrace();}return 0;}//checke if a file exists in HDFSpublic boolean CheckFileExist(Configuration conf, String FileName){try{Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path path = new Path(FileName);boolean isExists = hdfs.exists(path);return isExists;}catch(IOException e){e.printStackTrace();}return false;}//Get the locations of a file in the HDFS clusterpublic List<String []> GetFileBolckHost(Configuration conf, String FileName){try{List<String []> list = new ArrayList<String []>();Configuration config = new Configuration();FileSystem hdfs = FileSystem.get(config);Path path = new Path(FileName);FileStatus fileStatus = hdfs.getFileStatus(path);BlockLocation[] blkLocations = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());int blkCount = blkLocations.length;for (int i=0; i < blkCount; i++) {String[] hosts = blkLocations.getHosts();list.add(hosts);}return list;}catch(IOException e){e.printStackTrace();}return null;}//Get a list of all the nodes host names in the HDFS clusterpublic String[] GetAllNodeName(Configuration conf){try{Configuration config = new Configuration();FileSystem fs = FileSystem.get(config);DistributedFileSystem hdfs = (DistributedFileSystem) fs;DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();String[] names = new String[dataNodeStats.length];for (int i = 0; i < dataNodeStats.length; i++) {names = dataNodeStats.getHostName();}return names;}catch(IOException e){e.printStackTrace();}return null;}}
自己写的测试程序:
import java.io.IOException;import java.util.Date;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;public class File_Operation {public static void main(String args[]){Configuration conf = new Configuration();HDFS_File file = new HDFS_File();//print all the node nameString[] host_name = file.GetAllNodeName(conf);for (int i = 0; i<host_name.length; i++){System.out.println("the host name:"+host_name);}//create the fileString File_Name = "my_test";FSDataOutputStream fs = file.CreateFile(conf, File_Name);if (fs != null){try {fs.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println(File_Name+"create OK");}else{System.out.println(File_Name+"create fail");}//check the file existsif (file.CheckFileExist(conf, File_Name) == true){System.out.println(File_Name+"the file exists");}else{System.out.println(File_Name+"the file not exists");}//delete the fileif (file.DelFile(conf, File_Name, false) == true){System.out.println(File_Name+"the file delete");}else{System.out.println(File_Name+"the file not delete");}//copy the file to HDFSString srcFile = "/home/jackydai/my";String ToFile = "/home/jackydai/my11";String dstFile = "/user/jackydai/my";file.PutFile(conf, srcFile, dstFile);System.out.println("copy file ok!");//check the file last modfiy timelong mod_time = file.GetFileModTime(conf, dstFile);Date d = new Date(mod_time);System.out.println("the modefile time"+d);//get the locations of a file in HDFSList<String []> list = file.GetFileBolckHost(conf, dstFile);for (int i = 0; i < list.size(); i++){for(int j = 0; j < list.get(i).length; j++){System.out.println("the bolck host name:"+list.get(i)[j]);}}System.out.println("host name over!");//read the filefile.ReadFile(conf, dstFile);System.out.println("read over!");//copy the file to localfile.GetFile(conf, dstFile, ToFile);System.out.println("copy ok");}} |
|
|