Skip to content

Instantly share code, notes, and snippets.

@nilsso
Created July 15, 2022 21:15
Show Gist options
  • Select an option

  • Save nilsso/eddbf4cdb134740aa15ebbdccc4f7790 to your computer and use it in GitHub Desktop.

Select an option

Save nilsso/eddbf4cdb134740aa15ebbdccc4f7790 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""Text file cleaning utility (primarily for secured roll preparation)."""
from pathlib import Path
import typing
def clean_file(
in_path: Path,
out_path: Path,
in_encoding: str,
out_encoding: str,
):
with open(in_path, "r", encoding=in_encoding) as f:
with open(out_path, "w", encoding=out_encoding) as g:
g.write(f.read().replace("\\", ""))
def clean_file_name(p: Path) -> Path:
s = str(p.name).lower().replace(" ", "_")
i = s.find(".")
j = i + 1
return Path(f"{s[:i]}_cleaned.{s[j:]}")
DEFAULT_IN_ENCODING: str = "latin-1"
DEFAULT_OUT_ENCODING: str = "utf8"
Encoding = typing.Literal["latin-1", "utf8"]
DESCRIPTION = """\
Take a (hopefully) ASCII text file and re-write it as a cleaned UTF-8 text file.
Removes any all back-slashes.
"""
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser("clean_ascii", description=DESCRIPTION)
_encodings = typing.get_args(Encoding)
parser.add_argument("in_path", type=Path)
parser.add_argument("-o", "--out_path", type=Path)
parser.add_argument(
"-e",
"--in_encoding",
type=str,
choices=_encodings,
default=DEFAULT_IN_ENCODING,
)
parser.add_argument(
"-E",
"--out_encoding",
type=str,
choices=_encodings,
default=DEFAULT_OUT_ENCODING,
)
args = parser.parse_args()
out_path = args.out_path or args.in_path.parent / clean_file_name(args.in_path)
clean_file(
args.in_path,
out_path,
args.in_encoding,
args.out_encoding,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment