#! /usr/bin/env python # Copyright 2021, 2022 Steinar Knutsen # # Licensed under the EUPL, Version 1.2 or - as soon they will be approved by the # European Commission - subsequent versions of the EUPL (the "Licence"); You may # not use this work except in compliance with the Licence. You may obtain a copy # of the Licence at: # # https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 # # Unless required by applicable law or agreed to in writing, software # distributed under the Licence is distributed on an "AS IS" basis, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # Licence for the specific language governing permissions and limitations under # the Licence. import sys, getopt HELP_TEXT = """txt2itxt [-E] [-S] [-h] [-l] [-m] [-s] -E only use explicit rules -S treat paragraphs of short lines as preformatted (default off) -h print this text and exit -m treat mailquoting as monospaced (leading "> ") (default on) -s treat paragraphs where some line has "many" consecutive space as monospaced (default on) Read from STDIN and write to STDOUT, heuristically converting from plain text to limited ITXT. """ MONO = "--------------------------------------------------------------------------------" def series_of_short_lines(raw_lines): linelengths = [len(line) for line in raw_lines] chars = sum(linelengths) max_line = max(linelengths) if chars / len(raw_lines) < 55 and max_line < 66: return True return False def many_consecutive_spaces(raw_lines): for line in raw_lines: if " " in line: return True return False def mail_quoting(raw_lines): for line in raw_lines: if line[0] != '>': return False return True def space_leading_any_line(raw_lines): for line in raw_lines: if line[0].isspace(): return True return False def use_proportional(raw_lines, rules): if space_leading_any_line(raw_lines): return False if series_of_short_lines in rules: if series_of_short_lines(raw_lines): return False if many_consecutive_spaces in rules: if many_consecutive_spaces(raw_lines): return False if mail_quoting in rules: if mail_quoting(raw_lines): return False return True def convert_paragraph(lines, rules): if use_proportional(lines, rules): return lines new_lines = [ "-" * 80 + "\n"] new_lines.extend(lines) new_lines.append(new_lines[0]) return new_lines def convert(src, dst, rules): paragraph = [] for line in src: if len(line.strip()) == 0: if len(paragraph) > 0: dst.writelines(convert_paragraph(paragraph, rules)) paragraph.clear() dst.write(line) elif line.rstrip() == MONO: raise ValueError("ITXT monospace marker in input, not handled.") else: paragraph.append(line) if len(paragraph) > 0: dst.writelines(convert_paragraph(paragraph, rules)) paragraph.clear() def main(): rules = set() rules.add(many_consecutive_spaces) rules.add(mail_quoting) options, arguments = getopt.gnu_getopt(sys.argv[1:], "EShms") for option, value in options: if option == "-E": rules.clear() for option, value in options: if option == "-h": print(HELP_TEXT) sys.exit(0) elif option == "-S": rules.add(series_of_short_lines) elif option == "-m": rules.add(mail_quoting) elif option == "-s": rules.add(many_consecutive_spaces) convert(sys.stdin, sys.stdout, rules) if __name__ == "__main__": main()