Last active
June 2, 2023 08:23
-
-
Save zombie110year/4c2b5c31cbec0b5f18bbcf2e467c1fee to your computer and use it in GitHub Desktop.
这个脚本可以把指定目录下的 doc、xls、ppt 文件转换成 docx、xlsx、pptx格式。用法可阅读 parser 的参数。使用时不会检查文件锁定,但不会修改原文件,且会弹出Office应用窗口,可以手动关闭对应程序。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| from argparse import ArgumentParser | |
| from pathlib import Path | |
| from sys import exit | |
| from win32com.client import Dispatch, CDispatch | |
| logging.basicConfig(level="INFO") | |
| NEED_CONFIRM = True | |
| def parser(): | |
| p = ArgumentParser() | |
| p.add_argument("path", help="指定文件或文件夹") | |
| p.add_argument( | |
| "-r", | |
| "--recursive", | |
| action="store_true", | |
| # | |
| help="指定是否递归处理文件夹,此选项对文件不起作用", | |
| ) | |
| p.add_argument( | |
| "-e", | |
| "--extension", | |
| choices={"doc", "xls", "ppt"}, | |
| default="xls", | |
| help="指定要处理的文件类型,会转换成对应的OpenXML格式。对于文件,会自动根据后缀名判断", | |
| ) | |
| p.add_argument( | |
| "--yes", | |
| action="store_true", | |
| help="如果开启此选项,则程序不经确认便会开始执行,仅对处理文件夹有效", | |
| ) | |
| return p | |
| def convert(target: str, recursive: bool = False, extension: str = "xls"): | |
| "根据传入的参数,分配不同的任务" | |
| target = Path(target) | |
| if target.is_file(): | |
| convert_file(target) | |
| elif target.is_dir(): | |
| if recursive: | |
| convert_dir_rec(root=target, ext=extension) | |
| else: | |
| convert_dir(root=target, ext=extension) | |
| else: | |
| logging.error("%s 既不是文件也不是文件夹", target) | |
| exit(-1) | |
| def convert_file(target: Path): | |
| if target.suffix == ".xls": | |
| with Excel() as app: | |
| convert_xls(target, app) | |
| elif target.suffix == ".doc": | |
| with Word() as app: | |
| convert_doc(target, app) | |
| elif target.suffix == ".ppt": | |
| with PowerPoint() as app: | |
| convert_ppt(target, app) | |
| else: | |
| logging.error("不支持的文件格式:%s", target.suffix) | |
| def convert_dir_rec(root: Path, ext: str): | |
| """root 是递归处理的起点,ext 不带点""" | |
| logging.info("递归筛选 %s 下的所有 .%s 文件", root.as_posix(), ext) | |
| candidates = root.glob(f"**/*.{ext}") | |
| convert_list(candidates, ext) | |
| def convert_dir(root: Path, ext: str): | |
| """root 是处理的起点,ext 不带点""" | |
| logging.info("筛选 %s 下的所有 .%s 文件", root.as_posix(), ext) | |
| candidates = root.glob(f"*.{ext}") | |
| convert_list(candidates, ext) | |
| def convert_list(candidates: list[Path], ext: str): | |
| """candidates 是可迭代对象,ext 不带点""" | |
| candidates = list(candidates) | |
| can_run = False | |
| if NEED_CONFIRM: | |
| for i, file in enumerate(candidates): | |
| print(f"{i+1:02}: {file.as_posix()}") | |
| if input("将会处理如上文件,确认?[y/n] ").strip().lower().startswith("y"): | |
| can_run = True | |
| else: | |
| can_run = True | |
| if not can_run: | |
| logging.info("用户取消操作") | |
| return | |
| office = { | |
| "xls": Excel, | |
| "doc": Word, | |
| "ppt": PowerPoint, | |
| }.get(ext, None) | |
| method = { | |
| "xls": convert_xls, | |
| "doc": convert_doc, | |
| "ppt": convert_ppt, | |
| }.get(ext, None) | |
| if office is None or method is None: | |
| logging.error("不支持的文件格式:.%s", ext) | |
| return | |
| with office() as app: | |
| for i in candidates: | |
| method(i, app) | |
| def convert_xls(target: Path, app: CDispatch): | |
| xlOpenXMLWorkbook = 51 | |
| abso = target.absolute() | |
| src = str(abso) | |
| dest = str(abso.with_suffix(".xlsx")) | |
| xls = app.Workbooks.Open(src) | |
| xls.SaveAs(dest, xlOpenXMLWorkbook) | |
| xls.Close() | |
| logging.info("Save as %s", dest) | |
| def convert_doc(target: Path, app: CDispatch): | |
| wdFormatXMLDocument = 12 | |
| abso = target.absolute() | |
| src = str(abso) | |
| dest = str(abso.with_suffix(".docx")) | |
| doc = app.Documents.Open(src) | |
| doc.SaveAs2(dest, wdFormatXMLDocument) | |
| doc.Close() | |
| logging.info("Save as %s", dest) | |
| def convert_ppt(target: Path, app: CDispatch): | |
| ppSaveAsOpenXMLPresentation = 24 | |
| abso = target.absolute() | |
| src = str(abso) | |
| dest = str(abso.with_suffix(".pptx")) | |
| ppt = app.Presentations.Open(src) | |
| ppt.SaveAs(dest, ppSaveAsOpenXMLPresentation) | |
| ppt.Close() | |
| logging.info("Save as %s", dest) | |
| class OfficeCom: | |
| _app: CDispatch | None | |
| _code: str | |
| def __init__(self) -> None: | |
| self._app = None | |
| def __enter__(self) -> CDispatch: | |
| self._app = Dispatch(self._code) | |
| self._app.Visible = True | |
| return self._app | |
| def __exit__(self, a, b, c): | |
| self._app.Quit() | |
| class Excel(OfficeCom): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self._code = "Excel.Application" | |
| class Word(OfficeCom): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self._code = "Word.Application" | |
| class PowerPoint(OfficeCom): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self._code = "PowerPoint.Application" | |
| if __name__ == "__main__": | |
| p = parser() | |
| args = p.parse_args() | |
| if args.yes: | |
| NEED_CONFIRM = False | |
| convert(args.path, args.recursive, args.extension) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment