package com.twitter.elephantbird.mapreduce.input;

import com.google.common.collect.Lists;
import com.hadoop.compression.lzo.LzoIndex;
import com.twitter.elephantbird.util.HadoopCompat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/elephant-bird-core-4.3.jar:com/twitter/elephantbird/mapreduce/input/LzoInputFormat.class */
public abstract class LzoInputFormat<K, V> extends FileInputFormat<K, V> {
    private static final Logger LOG = LoggerFactory.getLogger(LzoInputFormat.class);
    private final PathFilter hiddenPathFilter = new PathFilter() { // from class: com.twitter.elephantbird.mapreduce.input.LzoInputFormat.1
        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith(".") || name.startsWith("_")) ? false : true;
        }
    };
    private final PathFilter visibleLzoFilter = new PathFilter() { // from class: com.twitter.elephantbird.mapreduce.input.LzoInputFormat.2
        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith(".") || name.startsWith("_") || !name.endsWith(".lzo")) ? false : true;
        }
    };

    protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
        List<FileStatus> listStatus = super.listStatus(jobContext);
        ArrayList newArrayList = Lists.newArrayList();
        boolean z = HadoopCompat.getConfiguration(jobContext).getBoolean("mapred.input.dir.recursive", false);
        for (FileStatus fileStatus : listStatus) {
            addInputPath(newArrayList, fileStatus.getPath().getFileSystem(HadoopCompat.getConfiguration(jobContext)), fileStatus, z);
        }
        LOG.debug("Total lzo input paths to process : " + newArrayList.size());
        return newArrayList;
    }

    protected void addInputPath(List<FileStatus> list, FileSystem fileSystem, FileStatus fileStatus, boolean z) throws IOException {
        Path path = fileStatus.getPath();
        if (!fileStatus.isDir()) {
            if (this.visibleLzoFilter.accept(path)) {
                list.add(fileStatus);
            }
        } else if (z) {
            for (FileStatus fileStatus2 : fileSystem.listStatus(path, this.hiddenPathFilter)) {
                addInputPath(list, fileSystem, fileStatus2, z);
            }
        }
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        try {
            return path.getFileSystem(HadoopCompat.getConfiguration(jobContext)).exists(path.suffix(".index"));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        LzoIndex readIndex;
        List<FileSplit> splits = super.getSplits(jobContext);
        ArrayList arrayList = new ArrayList();
        Object obj = null;
        LzoIndex lzoIndex = null;
        for (FileSplit fileSplit : splits) {
            Path path = fileSplit.getPath();
            if (path.equals(obj)) {
                readIndex = lzoIndex;
            } else {
                readIndex = LzoIndex.readIndex(path.getFileSystem(HadoopCompat.getConfiguration(jobContext)), path);
                obj = path;
                lzoIndex = readIndex;
            }
            if (readIndex == null) {
                throw new IOException("Index not found for " + path);
            }
            if (readIndex.isEmpty()) {
                arrayList.add(fileSplit);
            } else {
                long start = fileSplit.getStart();
                long length = start + fileSplit.getLength();
                long alignSliceStartToIndex = readIndex.alignSliceStartToIndex(start, length);
                long alignSliceEndToIndex = readIndex.alignSliceEndToIndex(length, path.getFileSystem(HadoopCompat.getConfiguration(jobContext)).getFileStatus(path).getLen());
                if (alignSliceStartToIndex != -1 && alignSliceEndToIndex != -1) {
                    arrayList.add(new FileSplit(path, alignSliceStartToIndex, alignSliceEndToIndex - alignSliceStartToIndex, fileSplit.getLocations()));
                    LOG.debug("Added LZO split for " + path + "[start=" + alignSliceStartToIndex + ", length=" + (alignSliceEndToIndex - alignSliceStartToIndex) + "]");
                }
            }
        }
        return arrayList;
    }
}
