Skip to content

Instantly share code, notes, and snippets.

@zombie110year
Last active June 2, 2023 08:23
Show Gist options
  • Select an option

  • Save zombie110year/4c2b5c31cbec0b5f18bbcf2e467c1fee to your computer and use it in GitHub Desktop.

Select an option

Save zombie110year/4c2b5c31cbec0b5f18bbcf2e467c1fee to your computer and use it in GitHub Desktop.
这个脚本可以把指定目录下的 doc、xls、ppt 文件转换成 docx、xlsx、pptx格式。用法可阅读 parser 的参数。使用时不会检查文件锁定,但不会修改原文件,且会弹出Office应用窗口,可以手动关闭对应程序。
import logging
from argparse import ArgumentParser
from pathlib import Path
from sys import exit
from win32com.client import Dispatch, CDispatch
logging.basicConfig(level="INFO")
NEED_CONFIRM = True
def parser():
p = ArgumentParser()
p.add_argument("path", help="指定文件或文件夹")
p.add_argument(
"-r",
"--recursive",
action="store_true",
#
help="指定是否递归处理文件夹,此选项对文件不起作用",
)
p.add_argument(
"-e",
"--extension",
choices={"doc", "xls", "ppt"},
default="xls",
help="指定要处理的文件类型,会转换成对应的OpenXML格式。对于文件,会自动根据后缀名判断",
)
p.add_argument(
"--yes",
action="store_true",
help="如果开启此选项,则程序不经确认便会开始执行,仅对处理文件夹有效",
)
return p
def convert(target: str, recursive: bool = False, extension: str = "xls"):
"根据传入的参数,分配不同的任务"
target = Path(target)
if target.is_file():
convert_file(target)
elif target.is_dir():
if recursive:
convert_dir_rec(root=target, ext=extension)
else:
convert_dir(root=target, ext=extension)
else:
logging.error("%s 既不是文件也不是文件夹", target)
exit(-1)
def convert_file(target: Path):
if target.suffix == ".xls":
with Excel() as app:
convert_xls(target, app)
elif target.suffix == ".doc":
with Word() as app:
convert_doc(target, app)
elif target.suffix == ".ppt":
with PowerPoint() as app:
convert_ppt(target, app)
else:
logging.error("不支持的文件格式:%s", target.suffix)
def convert_dir_rec(root: Path, ext: str):
"""root 是递归处理的起点,ext 不带点"""
logging.info("递归筛选 %s 下的所有 .%s 文件", root.as_posix(), ext)
candidates = root.glob(f"**/*.{ext}")
convert_list(candidates, ext)
def convert_dir(root: Path, ext: str):
"""root 是处理的起点,ext 不带点"""
logging.info("筛选 %s 下的所有 .%s 文件", root.as_posix(), ext)
candidates = root.glob(f"*.{ext}")
convert_list(candidates, ext)
def convert_list(candidates: list[Path], ext: str):
"""candidates 是可迭代对象,ext 不带点"""
candidates = list(candidates)
can_run = False
if NEED_CONFIRM:
for i, file in enumerate(candidates):
print(f"{i+1:02}: {file.as_posix()}")
if input("将会处理如上文件,确认?[y/n] ").strip().lower().startswith("y"):
can_run = True
else:
can_run = True
if not can_run:
logging.info("用户取消操作")
return
office = {
"xls": Excel,
"doc": Word,
"ppt": PowerPoint,
}.get(ext, None)
method = {
"xls": convert_xls,
"doc": convert_doc,
"ppt": convert_ppt,
}.get(ext, None)
if office is None or method is None:
logging.error("不支持的文件格式:.%s", ext)
return
with office() as app:
for i in candidates:
method(i, app)
def convert_xls(target: Path, app: CDispatch):
xlOpenXMLWorkbook = 51
abso = target.absolute()
src = str(abso)
dest = str(abso.with_suffix(".xlsx"))
xls = app.Workbooks.Open(src)
xls.SaveAs(dest, xlOpenXMLWorkbook)
xls.Close()
logging.info("Save as %s", dest)
def convert_doc(target: Path, app: CDispatch):
wdFormatXMLDocument = 12
abso = target.absolute()
src = str(abso)
dest = str(abso.with_suffix(".docx"))
doc = app.Documents.Open(src)
doc.SaveAs2(dest, wdFormatXMLDocument)
doc.Close()
logging.info("Save as %s", dest)
def convert_ppt(target: Path, app: CDispatch):
ppSaveAsOpenXMLPresentation = 24
abso = target.absolute()
src = str(abso)
dest = str(abso.with_suffix(".pptx"))
ppt = app.Presentations.Open(src)
ppt.SaveAs(dest, ppSaveAsOpenXMLPresentation)
ppt.Close()
logging.info("Save as %s", dest)
class OfficeCom:
_app: CDispatch | None
_code: str
def __init__(self) -> None:
self._app = None
def __enter__(self) -> CDispatch:
self._app = Dispatch(self._code)
self._app.Visible = True
return self._app
def __exit__(self, a, b, c):
self._app.Quit()
class Excel(OfficeCom):
def __init__(self) -> None:
super().__init__()
self._code = "Excel.Application"
class Word(OfficeCom):
def __init__(self) -> None:
super().__init__()
self._code = "Word.Application"
class PowerPoint(OfficeCom):
def __init__(self) -> None:
super().__init__()
self._code = "PowerPoint.Application"
if __name__ == "__main__":
p = parser()
args = p.parse_args()
if args.yes:
NEED_CONFIRM = False
convert(args.path, args.recursive, args.extension)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment