/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer;

import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.indexer.IndexerMapReduce;
import org.apache.nutch.segment.SegmentChecker;
import org.apache.nutch.util.HadoopFSUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchTool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IndexingJob
extends NutchTool
implements Tool {
    private static final Random RANDOM = new Random();
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    public IndexingJob() {
        super(null);
    }

    public IndexingJob(Configuration conf) {
        super(conf);
    }

    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit) throws IOException, InterruptedException, ClassNotFoundException {
        this.index(crawlDb, linkDb, segments, noCommit, false, null);
    }

    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone) throws IOException, InterruptedException, ClassNotFoundException {
        this.index(crawlDb, linkDb, segments, noCommit, deleteGone, null);
    }

    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params) throws IOException, InterruptedException, ClassNotFoundException {
        this.index(crawlDb, linkDb, segments, noCommit, deleteGone, params, false, false);
    }

    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize) throws IOException, InterruptedException, ClassNotFoundException {
        this.index(crawlDb, linkDb, segments, noCommit, deleteGone, params, false, false, false);
    }

    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize, boolean addBinaryContent) throws IOException, InterruptedException, ClassNotFoundException {
        this.index(crawlDb, linkDb, segments, noCommit, deleteGone, params, false, false, false, false);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize, boolean addBinaryContent, boolean base64) throws IOException, InterruptedException, ClassNotFoundException {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("Indexer: starting");
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch IndexingJob: crawldb: " + String.valueOf(crawlDb) + " segment(s): " + String.valueOf(segments)));
        job.setJobName("Indexer");
        Configuration conf = job.getConfiguration();
        LOG.info("Indexer: deleting gone documents: {}", (Object)deleteGone);
        LOG.info("Indexer: URL filtering: {}", (Object)filter);
        LOG.info("Indexer: URL normalizing: {}", (Object)normalize);
        if (addBinaryContent) {
            if (base64) {
                LOG.info("Indexer: adding binary content as Base64");
            } else {
                LOG.info("Indexer: adding binary content");
            }
        }
        IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job, addBinaryContent);
        conf.setBoolean("indexer.delete", deleteGone);
        conf.setBoolean("indexer.url.filters", filter);
        conf.setBoolean("indexer.url.normalizers", normalize);
        conf.setBoolean("indexer.binary.base64", base64);
        conf.setBoolean("indexer.nocommit", noCommit);
        if (params != null) {
            conf.set("indexer.additional.params", params);
        }
        job.setReduceSpeculativeExecution(false);
        Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + RANDOM.nextInt());
        FileOutputFormat.setOutputPath((Job)job, (Path)tmp);
        try {
            try {
                boolean success = job.waitForCompletion(true);
                if (!success) {
                    String message = NutchJob.getJobFailureLogMessage("Indexing", job);
                    LOG.error(message);
                    throw new RuntimeException(message);
                }
            }
            catch (IOException | ClassNotFoundException | InterruptedException e) {
                LOG.error(StringUtils.stringifyException((Throwable)e));
                throw e;
            }
            LOG.info("Indexer: number of documents indexed, deleted, or skipped:");
            for (Counter counter : (CounterGroup)job.getCounters().getGroup("IndexerStatus")) {
                LOG.info("Indexer: {}  {}", (Object)String.format(Locale.ROOT, "%6d", counter.getValue()), (Object)counter.getName());
            }
            stopWatch.stop();
            LOG.info("Indexer: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
        }
        finally {
            tmp.getFileSystem(conf).delete(tmp, true);
        }
    }

    private static void usage() {
        System.err.println("Usage: Indexer (<crawldb> | -nocrawldb) (<segment> ... | -dir <segments>) [general options]");
        System.err.println("");
        System.err.println("Index given segments using configured indexer plugins");
        System.err.println("");
        System.err.println("The CrawlDb is optional but it is required to send deletion requests for duplicates");
        System.err.println("and to read the proper document score/boost/weight passed to the indexers.");
        System.err.println("");
        System.err.println("Required arguments:");
        System.err.println("");
        System.err.println("\t<crawldb>\tpath to CrawlDb, or");
        System.err.println("\t-nocrawldb\tflag to indicate that no CrawlDb shall be used");
        System.err.println("");
        System.err.println("\t<segment> ...\tpath(s) to segment, or");
        System.err.println("\t-dir <segments>\tpath to segments/ directory,");
        System.err.println("\t               \t(all subdirectories are read as segments)");
        System.err.println("");
        System.err.println("General options:");
        System.err.println("\t");
        System.err.println("\t-linkdb <linkdb>\tuse LinkDb to index anchor texts of incoming links");
        System.err.println("\t-params k1=v1&k2=v2...\tparameters passed to indexer plugins");
        System.err.println("\t                      \t(via property indexer.additional.params)");
        System.err.println("");
        System.err.println("\t-noCommit\tdo not call the commit method of indexer plugins");
        System.err.println("\t-deleteGone\tsend deletion requests for 404s, redirects, duplicates");
        System.err.println("\t-filter   \tskip documents with URL rejected by configured URL filters");
        System.err.println("\t-normalize\tnormalize URLs before indexing");
        System.err.println("\t-addBinaryContent\tindex raw/binary content in field `binaryContent`");
        System.err.println("\t-base64   \tuse Base64 encoding for binary content");
        System.err.println("");
    }

    public int run(String[] args) throws Exception {
        if (args.length == 0) {
            IndexingJob.usage();
            return -1;
        }
        Path crawlDb = null;
        boolean noCrawlDb = false;
        Path linkDb = null;
        ArrayList<Path> segments = new ArrayList<Path>();
        String params = null;
        boolean noCommit = false;
        boolean deleteGone = false;
        boolean filter = false;
        boolean normalize = false;
        boolean addBinaryContent = false;
        boolean base64 = false;
        for (int i = 0; i < args.length; ++i) {
            FileSystem fs = null;
            Path dir = null;
            if (args[i].equals("-nocrawldb")) {
                noCrawlDb = true;
                continue;
            }
            if (args[i].equals("-linkdb")) {
                linkDb = new Path(args[++i]);
                continue;
            }
            if (args[i].equals("-dir")) {
                Path[] files;
                dir = new Path(args[++i]);
                fs = dir.getFileSystem(this.getConf());
                FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
                for (Path p : files = HadoopFSUtil.getPaths(fstats)) {
                    if (!SegmentChecker.isIndexable(p, fs)) continue;
                    segments.add(p);
                }
                continue;
            }
            if (args[i].equals("-noCommit")) {
                noCommit = true;
                continue;
            }
            if (args[i].equals("-deleteGone")) {
                deleteGone = true;
                continue;
            }
            if (args[i].equals("-filter")) {
                filter = true;
                continue;
            }
            if (args[i].equals("-normalize")) {
                normalize = true;
                continue;
            }
            if (args[i].equals("-addBinaryContent")) {
                addBinaryContent = true;
                continue;
            }
            if (args[i].equals("-base64")) {
                base64 = true;
                continue;
            }
            if (args[i].equals("-params")) {
                params = args[++i];
                continue;
            }
            if (crawlDb == null && !noCrawlDb) {
                crawlDb = new Path(args[i]);
                continue;
            }
            dir = new Path(args[i]);
            fs = dir.getFileSystem(this.getConf());
            if (!SegmentChecker.isIndexable(dir, fs)) continue;
            segments.add(dir);
        }
        if (segments.size() == 0) {
            IndexingJob.usage();
            System.err.println("No indexable segments passed as arguments. At least one segment is required!");
            return -1;
        }
        try {
            this.index(crawlDb, linkDb, segments, noCommit, deleteGone, params, filter, normalize, addBinaryContent, base64);
            return 0;
        }
        catch (Exception e) {
            LOG.error("Indexer: {}", (Object)StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new IndexingJob(), (String[])args);
        System.exit(res);
    }

    @Override
    public Map<String, Object> run(Map<String, Object> args, String crawlId) throws Exception {
        Object path;
        Object crawldbPath;
        boolean noCommit = false;
        boolean deleteGone = false;
        boolean filter = false;
        boolean normalize = false;
        boolean isSegment = false;
        boolean addBinaryContent = false;
        boolean base64 = false;
        String params = null;
        Configuration conf = this.getConf();
        Path crawlDb = args.containsKey("crawldb") ? ((crawldbPath = args.get("crawldb")) instanceof Path ? (Path)crawldbPath : new Path(crawldbPath.toString())) : new Path(crawlId + "/crawldb");
        Path linkdb = null;
        ArrayList<Path> segments = new ArrayList<Path>();
        linkdb = args.containsKey("linkdb") ? ((path = args.get("linkdb")) instanceof Path ? (Path)path : new Path(path.toString())) : new Path(crawlId + "/linkdb");
        if (args.containsKey("segment_dir")) {
            Path[] files;
            isSegment = true;
            Object segDir = args.get("segment_dir");
            Path segmentsDir = segDir instanceof Path ? (Path)segDir : new Path(segDir.toString());
            FileSystem fs = segmentsDir.getFileSystem(this.getConf());
            FileStatus[] fstats = fs.listStatus(segmentsDir, HadoopFSUtil.getPassDirectoriesFilter(fs));
            for (Path p : files = HadoopFSUtil.getPaths(fstats)) {
                if (!SegmentChecker.isIndexable(p, fs)) continue;
                segments.add(p);
            }
        }
        if (args.containsKey("segment")) {
            Object segmentsFromArg = args.get("segment");
            ArrayList segmentList = new ArrayList();
            if (segmentsFromArg instanceof ArrayList) {
                segmentList = (ArrayList)segmentsFromArg;
            } else if (segmentsFromArg instanceof Path) {
                segmentList.add(segmentsFromArg.toString());
            }
            for (String segment : segmentList) {
                segments.add(new Path(segment));
            }
        }
        if (!isSegment) {
            String segment;
            String segment_dir = crawlId + "/segments";
            File segmentsDir = new File(segment_dir);
            File[] segmentsList = segmentsDir.listFiles();
            Arrays.sort(segmentsList, (f1, f2) -> {
                if (f1.lastModified() > f2.lastModified()) {
                    return -1;
                }
                return 0;
            });
            segment = new Path(segmentsList[0].getPath());
            segments.add((Path)segment);
        }
        if (args.containsKey("noCommit")) {
            noCommit = true;
        }
        if (args.containsKey("deleteGone")) {
            deleteGone = true;
        }
        if (args.containsKey("normalize")) {
            normalize = true;
        }
        if (args.containsKey("filter")) {
            filter = true;
        }
        if (args.containsKey("addBinaryContent")) {
            addBinaryContent = true;
            if (args.containsKey("base64")) {
                base64 = true;
            }
        }
        if (args.containsKey("params")) {
            params = (String)args.get("params");
        }
        this.setConf(conf);
        this.index(crawlDb, linkdb, segments, noCommit, deleteGone, params, filter, normalize, addBinaryContent, base64);
        HashMap<String, Object> results = new HashMap<String, Object>();
        results.put("result", 0);
        return results;
    }
}

