/*
 * Decompiled with CFR 0.152.
 */
package picard.illumina;

import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.BasicFastqWriter;
import htsjdk.samtools.fastq.FastqReader;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.fastq.FastqWriter;
import htsjdk.samtools.fastq.FastqWriterFactory;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.StringUtil;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.Option;
import picard.cmdline.Usage;
import picard.fastq.Casava18ReadNameEncoder;
import picard.fastq.IlluminaReadNameEncoder;
import picard.fastq.ReadNameEncoder;
import picard.illumina.IlluminaBasecallsConverter;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.ReadData;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import picard.util.IlluminaUtil;
import picard.util.TabbedTextFileWithHeaderParser;

public class IlluminaBasecallsToFastq
extends CommandLineProgram {
    @Usage
    public String USAGE = this.getStandardUsagePreamble() + "Generate fastq file(s) from data in an Illumina basecalls output directory.\n" + "Separate fastq file(s) are created for each template read, and for each barcode read, in the basecalls.\n" + "Template fastqs have extensions like .<number>.fastq, where <number> is the number of the template read,\n" + "starting with 1.  Barcode fastqs have extensions like .barcode_<number>.fastq, where <number> is the number\n" + "of the barcode read, starting with 1.";
    @Option(doc="The basecalls directory. ", shortName="B")
    public File BASECALLS_DIR;
    @Option(doc="The barcodes directory with _barcode.txt files (generated by ExtractIlluminaBarcodes). If not set, use BASECALLS_DIR. ", shortName="BCD", optional=true)
    public File BARCODES_DIR;
    @Option(doc="Lane number. ", shortName="L")
    public Integer LANE;
    @Option(doc="The prefix for output fastqs.  Extensions as described above are appended.  Use this option for a non-barcoded run, or for a barcoded run in which it is not desired to demultiplex reads into separate files by barcode.", shortName="O", mutex={"MULTIPLEX_PARAMS"})
    public File OUTPUT_PREFIX;
    @Option(doc="The barcode of the run.  Prefixed to read names.", optional=false)
    public String RUN_BARCODE;
    @Option(doc="The name of the machine on which the run was sequenced; required if emitting Casava1.8-style read name headers", optional=true)
    public String MACHINE_NAME;
    @Option(doc="The barcode of the flowcell that was sequenced; required if emitting Casava1.8-style read name headers", optional=true)
    public String FLOWCELL_BARCODE;
    @Option(doc="A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam assumes the  data to be in. It should consist of integer/character pairs describing the number of cycles and the type of those cycles (B for Barcode, T for Template, and S for skip).  E.g. If the input data consists of 80 base clusters and we provide a read structure of \"36T8B8S28T\" then, before being converted to SAM records those bases will be split into 4 reads where read one consists of 36 cycles of template, read two consists of 8 cycles of barcode, read three will be an 8 base read of skipped cycles and read four is another 28 cycle template read.  The read consisting of skipped cycles would NOT be included in output SAM/BAM file read groups.", shortName="RS")
    public String READ_STRUCTURE;
    @Option(doc="Tab-separated file for creating all output fastqs demultiplexed by barcode for a lane with single IlluminaBasecallsToFastq invocation.  The columns are OUTPUT_PREFIX, and BARCODE_1, BARCODE_2 ... BARCODE_X where X = number of barcodes per cluster (optional).  Row with BARCODE_1 set to 'N' is used to specify an output_prefix for no barcode match.", mutex={"OUTPUT_PREFIX"})
    public File MULTIPLEX_PARAMS;
    @Option(doc="Which adapters to look for in the read.")
    public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = new ArrayList<IlluminaUtil.IlluminaAdapterPair>(Arrays.asList(IlluminaUtil.IlluminaAdapterPair.INDEXED, IlluminaUtil.IlluminaAdapterPair.DUAL_INDEXED, IlluminaUtil.IlluminaAdapterPair.NEXTERA_V2, IlluminaUtil.IlluminaAdapterPair.FLUIDIGM));
    @Option(doc="The number of threads to run in parallel. If NUM_PROCESSORS = 0, number of cores is automatically set to the number of cores available on the machine. If NUM_PROCESSORS < 0, then the number of cores used will be the number available on the machine less NUM_PROCESSORS.")
    public Integer NUM_PROCESSORS = 0;
    @Option(doc="If set, this is the first tile to be processed (used for debugging).  Note that tiles are not processed in numerical order.", optional=true)
    public Integer FIRST_TILE;
    @Option(doc="If set, process no more than this many tiles (used for debugging).", optional=true)
    public Integer TILE_LIMIT;
    @Option(doc="Apply EAMSS filtering to identify inappropriately quality scored bases towards the ends of reads and convert their quality scores to Q2.")
    public boolean APPLY_EAMSS_FILTER = true;
    @Option(doc="If true, call System.gc() periodically.  This is useful in cases in which the -Xmx value passed is larger than the available memory.")
    public Boolean FORCE_GC = true;
    @Option(doc="Configure SortingCollections to store this many records before spilling to disk. For an indexed run, each SortingCollection gets this value/number of indices.")
    public int MAX_READS_IN_RAM_PER_TILE = 1200000;
    @Option(doc="The minimum quality (after transforming 0s to 1s) expected from reads.  If qualities are lower than this value, an error is thrown.The default of 2 is what the Illumina's spec describes as the minimum, but in practice the value has been observed lower.")
    public int MINIMUM_QUALITY = 2;
    @Option(doc="Whether to include non-PF reads", shortName="NONPF", optional=true)
    public boolean INCLUDE_NON_PF_READS = true;
    @Option(doc="The read name header formatting to emit.  Casava1.8 formatting has additional information beyond Illumina, including: the passing-filter flag value for the read, the flowcell name, and the sequencer name.", optional=false)
    public ReadNameFormat READ_NAME_FORMAT = ReadNameFormat.CASAVA_1_8;
    @Option(shortName="GZIP", doc="Compress output FASTQ files using gzip and append a .gz extension to the file names.")
    public boolean COMPRESS_OUTPUTS = false;
    private final Map<String, FastqRecordsWriter> barcodeFastqWriterMap = new HashMap<String, FastqRecordsWriter>();
    private ReadStructure readStructure;
    IlluminaBasecallsConverter<FastqRecordsForCluster> basecallsConverter;
    private static final Log log = Log.getInstance(IlluminaBasecallsToFastq.class);
    private final FastqWriterFactory fastqWriterFactory = new FastqWriterFactory();
    private ReadNameEncoder readNameEncoder;
    private static final Comparator<FastqRecordsForCluster> queryNameComparator = new Comparator<FastqRecordsForCluster>(){

        @Override
        public int compare(FastqRecordsForCluster r1, FastqRecordsForCluster r2) {
            return SAMRecordQueryNameComparator.compareReadNames((String)r1.templateRecords[0].getReadHeader(), (String)r2.templateRecords[0].getReadHeader());
        }
    };

    @Override
    protected int doWork() {
        this.initialize();
        this.basecallsConverter.doTileProcessing();
        return 0;
    }

    @Override
    protected String[] customCommandLineValidation() {
        LinkedList<String> errors = new LinkedList<String>();
        if (this.READ_NAME_FORMAT == ReadNameFormat.CASAVA_1_8 && this.MACHINE_NAME == null) {
            errors.add("MACHINE_NAME is required when using Casava1.8-style read name headers.");
        }
        if (this.READ_NAME_FORMAT == ReadNameFormat.CASAVA_1_8 && this.FLOWCELL_BARCODE == null) {
            errors.add("FLOWCELL_BARCODE is required when using Casava1.8-style read name headers.");
        }
        if (errors.isEmpty()) {
            return null;
        }
        return errors.toArray(new String[errors.size()]);
    }

    private void initialize() {
        boolean demultiplex;
        this.fastqWriterFactory.setCreateMd5(this.CREATE_MD5_FILE);
        switch (this.READ_NAME_FORMAT) {
            case CASAVA_1_8: {
                this.readNameEncoder = new Casava18ReadNameEncoder(this.MACHINE_NAME, this.RUN_BARCODE, this.FLOWCELL_BARCODE);
                break;
            }
            case ILLUMINA: {
                this.readNameEncoder = new IlluminaReadNameEncoder(this.RUN_BARCODE);
            }
        }
        BclQualityEvaluationStrategy bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(this.MINIMUM_QUALITY);
        this.readStructure = new ReadStructure(this.READ_STRUCTURE);
        if (this.MULTIPLEX_PARAMS != null) {
            IOUtil.assertFileIsReadable((File)this.MULTIPLEX_PARAMS);
        }
        if (this.OUTPUT_PREFIX != null) {
            this.barcodeFastqWriterMap.put(null, this.buildWriter(this.OUTPUT_PREFIX));
            demultiplex = false;
        } else {
            this.populateWritersFromMultiplexParams();
            demultiplex = true;
        }
        int readsPerCluster = this.readStructure.templates.length() + this.readStructure.barcodes.length();
        this.basecallsConverter = new IlluminaBasecallsConverter<FastqRecordsForCluster>(this.BASECALLS_DIR, this.BARCODES_DIR, this.LANE, this.readStructure, this.barcodeFastqWriterMap, demultiplex, this.MAX_READS_IN_RAM_PER_TILE / readsPerCluster, this.TMP_DIR, this.NUM_PROCESSORS, this.FORCE_GC, this.FIRST_TILE, this.TILE_LIMIT, queryNameComparator, new FastqRecordsForClusterCodec(this.readStructure.templates.length(), this.readStructure.barcodes.length()), FastqRecordsForCluster.class, bclQualityEvaluationStrategy, this.APPLY_EAMSS_FILTER, this.INCLUDE_NON_PF_READS);
        log.info(new Object[]{"READ STRUCTURE IS " + this.readStructure.toString()});
        this.basecallsConverter.setConverter(new ClusterToFastqRecordsForClusterConverter(this.basecallsConverter.getFactory().getOutputReadStructure()));
    }

    private void assertExpectedColumns(Set<String> actualCols, Set<String> expectedCols) {
        HashSet<String> missingColumns = new HashSet<String>(expectedCols);
        missingColumns.removeAll(actualCols);
        if (missingColumns.size() > 0) {
            throw new PicardException(String.format("MULTIPLEX_PARAMS file %s is missing the following columns: %s.", this.MULTIPLEX_PARAMS.getAbsolutePath(), StringUtil.join((String)", ", missingColumns)));
        }
    }

    private void populateWritersFromMultiplexParams() {
        TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(this.MULTIPLEX_PARAMS);
        Set expectedColumnLabels = CollectionUtil.makeSet((Object[])new String[]{"OUTPUT_PREFIX"});
        ArrayList<String> barcodeColumnLabels = new ArrayList<String>();
        for (int i = 1; i <= this.readStructure.barcodes.length(); ++i) {
            barcodeColumnLabels.add("BARCODE_" + i);
        }
        expectedColumnLabels.addAll(barcodeColumnLabels);
        this.assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);
        for (TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
            String key;
            ArrayList<String> barcodeValues = null;
            if (barcodeColumnLabels.size() > 0) {
                barcodeValues = new ArrayList<String>();
                for (String barcodeLabel : barcodeColumnLabels) {
                    barcodeValues.add(row.getField(barcodeLabel));
                }
            }
            String string = key = barcodeValues == null || barcodeValues.contains("N") ? null : StringUtil.join((String)"", barcodeValues);
            if (this.barcodeFastqWriterMap.containsKey(key)) {
                throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " + this.MULTIPLEX_PARAMS);
            }
            FastqRecordsWriter writer = this.buildWriter(new File(row.getField("OUTPUT_PREFIX")));
            this.barcodeFastqWriterMap.put(key, writer);
        }
        if (this.barcodeFastqWriterMap.isEmpty()) {
            throw new PicardException("MULTIPLEX_PARAMS file " + this.MULTIPLEX_PARAMS + " does have any data rows.");
        }
        libraryParamsParser.close();
    }

    private FastqRecordsWriter buildWriter(File outputPrefix) {
        String filename;
        int i;
        File outputDir = outputPrefix.getAbsoluteFile().getParentFile();
        IOUtil.assertDirectoryIsWritable((File)outputDir);
        String prefixString = outputPrefix.getName();
        String suffixString = this.COMPRESS_OUTPUTS ? "fastq.gz" : "fastq";
        FastqWriter[] templateWriters = new FastqWriter[this.readStructure.templates.length()];
        FastqWriter[] barcodeWriters = new FastqWriter[this.readStructure.barcodes.length()];
        for (i = 0; i < templateWriters.length; ++i) {
            filename = String.format("%s.%d.%s", prefixString, i + 1, suffixString);
            templateWriters[i] = this.fastqWriterFactory.newWriter(new File(outputDir, filename));
        }
        for (i = 0; i < barcodeWriters.length; ++i) {
            filename = String.format("%s.barcode_%d.%s", prefixString, i + 1, suffixString);
            barcodeWriters[i] = this.fastqWriterFactory.newWriter(new File(outputDir, filename));
        }
        return new FastqRecordsWriter(templateWriters, barcodeWriters);
    }

    public static void main(String[] args) {
        new IlluminaBasecallsToFastq().instanceMainWithExit(args);
    }

    static class FastqRecordsForClusterCodec
    implements SortingCollection.Codec<FastqRecordsForCluster> {
        private final int numTemplates;
        private final int numBarcodes;
        private BasicFastqWriter writer = null;
        private FastqReader reader = null;

        FastqRecordsForClusterCodec(int numTemplates, int numBarcodes) {
            this.numTemplates = numTemplates;
            this.numBarcodes = numBarcodes;
        }

        public void setOutputStream(OutputStream os) {
            this.writer = new BasicFastqWriter(new PrintStream(os));
        }

        public void setInputStream(InputStream is) {
            this.reader = new FastqReader(new BufferedReader(new InputStreamReader(is)));
        }

        public void encode(FastqRecordsForCluster val) {
            if (this.numTemplates != val.templateRecords.length) {
                throw new IllegalStateException();
            }
            if (this.numBarcodes != val.barcodeRecords.length) {
                throw new IllegalStateException();
            }
            this.encodeArray(val.templateRecords);
            this.encodeArray(val.barcodeRecords);
            this.writer.flush();
        }

        private void encodeArray(FastqRecord[] recs) {
            for (FastqRecord rec : recs) {
                this.writer.write(rec);
            }
        }

        public FastqRecordsForCluster decode() {
            if (!this.reader.hasNext()) {
                return null;
            }
            FastqRecordsForCluster ret = new FastqRecordsForCluster(this.numTemplates, this.numBarcodes);
            this.decodeArray(ret.templateRecords);
            this.decodeArray(ret.barcodeRecords);
            return ret;
        }

        private void decodeArray(FastqRecord[] recs) {
            for (int i = 0; i < recs.length; ++i) {
                recs[i] = this.reader.next();
            }
        }

        public SortingCollection.Codec<FastqRecordsForCluster> clone() {
            return new FastqRecordsForClusterCodec(this.numTemplates, this.numBarcodes);
        }
    }

    class ClusterToFastqRecordsForClusterConverter
    implements IlluminaBasecallsConverter.ClusterDataConverter<FastqRecordsForCluster> {
        private final int[] templateIndices;
        private final int[] barcodeIndices;

        ClusterToFastqRecordsForClusterConverter(ReadStructure outputReadStructure) {
            this.templateIndices = outputReadStructure.templates.getIndices();
            this.barcodeIndices = outputReadStructure.barcodes.getIndices();
        }

        @Override
        public FastqRecordsForCluster convertClusterToOutputRecord(ClusterData cluster) {
            FastqRecordsForCluster ret = new FastqRecordsForCluster(((IlluminaBasecallsToFastq)IlluminaBasecallsToFastq.this).readStructure.templates.length(), ((IlluminaBasecallsToFastq)IlluminaBasecallsToFastq.this).readStructure.barcodes.length());
            boolean appendReadNumberSuffix = ret.templateRecords.length > 1;
            this.makeFastqRecords(ret.templateRecords, this.templateIndices, cluster, appendReadNumberSuffix);
            this.makeFastqRecords(ret.barcodeRecords, this.barcodeIndices, cluster, false);
            return ret;
        }

        private void makeFastqRecords(FastqRecord[] recs, int[] indices, ClusterData cluster, boolean appendReadNumberSuffix) {
            for (int i = 0; i < indices.length; i = (int)((short)(i + 1))) {
                ReadData readData = cluster.getRead(indices[i]);
                String readBases = StringUtil.bytesToString((byte[])readData.getBases()).replace('.', 'N');
                String readName = IlluminaBasecallsToFastq.this.readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? Integer.valueOf(i + 1) : null);
                recs[i] = new FastqRecord(readName, readBases, null, SAMUtils.phredToFastq((byte[])readData.getQualities()));
            }
        }
    }

    static class FastqRecordsForCluster {
        final FastqRecord[] templateRecords;
        final FastqRecord[] barcodeRecords;

        FastqRecordsForCluster(int numTemplates, int numBarcodes) {
            this.templateRecords = new FastqRecord[numTemplates];
            this.barcodeRecords = new FastqRecord[numBarcodes];
        }
    }

    private static class FastqRecordsWriter
    implements IlluminaBasecallsConverter.ConvertedClusterDataWriter<FastqRecordsForCluster> {
        final FastqWriter[] templateWriters;
        final FastqWriter[] barcodeWriters;

        private FastqRecordsWriter(FastqWriter[] templateWriters, FastqWriter[] barcodeWriters) {
            this.templateWriters = templateWriters;
            this.barcodeWriters = barcodeWriters;
        }

        @Override
        public void write(FastqRecordsForCluster records) {
            this.write(this.templateWriters, records.templateRecords);
            this.write(this.barcodeWriters, records.barcodeRecords);
        }

        private void write(FastqWriter[] writers, FastqRecord[] records) {
            for (int i = 0; i < writers.length; ++i) {
                writers[i].write(records[i]);
            }
        }

        @Override
        public void close() {
            for (FastqWriter writer : this.templateWriters) {
                writer.close();
            }
            for (FastqWriter writer : this.barcodeWriters) {
                writer.close();
            }
        }
    }

    public static enum ReadNameFormat {
        CASAVA_1_8,
        ILLUMINA;

    }
}

