|
@@ -0,0 +1,256 @@
|
|
|
|
+import os
|
|
|
|
+import subprocess
|
|
|
|
+from pathlib import Path
|
|
|
|
+import tempfile
|
|
|
|
+import shutil
|
|
|
|
+
|
|
|
|
+# 设置pandoc路径
|
|
|
|
+PANDOC_PATH = r"D:\work\pandoc-3.6.4-windows-x86_64\pandoc-3.6.4\pandoc.exe"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def check_dependencies():
|
|
|
|
+ """检查必要的依赖是否已安装"""
|
|
|
|
+ # 检查 pandoc
|
|
|
|
+ if not os.path.exists(PANDOC_PATH):
|
|
|
|
+ raise Exception(f"未找到 pandoc,请检查路径是否正确: {PANDOC_PATH}")
|
|
|
|
+ print(f"找到 pandoc: {PANDOC_PATH}")
|
|
|
|
+
|
|
|
|
+ # 测试 pandoc 命令
|
|
|
|
+ try:
|
|
|
|
+ result = subprocess.run([PANDOC_PATH, '--version'], capture_output=True, text=True, encoding='utf-8')
|
|
|
|
+ print(f"Pandoc 版本信息:\n{result.stdout}")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"测试 pandoc 命令时出错: {str(e)}")
|
|
|
|
+ raise
|
|
|
|
+
|
|
|
|
+ # 检查 xelatex
|
|
|
|
+ if shutil.which('xelatex') is None:
|
|
|
|
+ raise Exception("未找到 xelatex,请先安装 MiKTeX:https://miktex.org/download")
|
|
|
|
+ print("找到 xelatex")
|
|
|
|
+
|
|
|
|
+ # 检查 pdflatex
|
|
|
|
+ if shutil.which('pdflatex') is None:
|
|
|
|
+ print("警告:未找到 pdflatex")
|
|
|
|
+ else:
|
|
|
|
+ print("找到 pdflatex")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def create_latex_template():
|
|
|
|
+ """创建支持中文的LaTeX模板"""
|
|
|
|
+ template = r"""\documentclass{article}
|
|
|
|
+\usepackage[UTF8]{ctex}
|
|
|
|
+\usepackage{listings}
|
|
|
|
+\usepackage{color}
|
|
|
|
+\usepackage{enumitem}
|
|
|
|
+\usepackage{booktabs}
|
|
|
|
+\usepackage{longtable}
|
|
|
|
+\usepackage{hyperref}
|
|
|
|
+\usepackage{amsmath}
|
|
|
|
+
|
|
|
|
+% 定义tightlist命令
|
|
|
|
+\newcommand{\tightlist}{%
|
|
|
|
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
|
|
|
+
|
|
|
|
+\lstset{
|
|
|
|
+ basicstyle=\ttfamily,
|
|
|
|
+ breaklines=true,
|
|
|
|
+ frame=single,
|
|
|
|
+ numbers=left,
|
|
|
|
+ numberstyle=\tiny,
|
|
|
|
+ keywordstyle=\color{blue},
|
|
|
|
+ commentstyle=\color{green!60!black},
|
|
|
|
+ stringstyle=\color{red}
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+\begin{document}
|
|
|
|
+$body$
|
|
|
|
+\end{document}"""
|
|
|
|
+
|
|
|
|
+ # 使用临时文件
|
|
|
|
+ with tempfile.NamedTemporaryFile(suffix='.tex', delete=False, mode='w', encoding='utf-8') as f:
|
|
|
|
+ f.write(template)
|
|
|
|
+ return f.name
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def markdown_to_docx(markdown_file: str, output_docx: str = None) -> str:
|
|
|
|
+ """
|
|
|
|
+ 将Markdown文件转换为DOCX文件
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ markdown_file (str): Markdown文件的路径
|
|
|
|
+ output_docx (str, optional): 输出的DOCX文件路径。如果为None,则使用与输入文件相同的名称
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ str: 生成的DOCX文件路径
|
|
|
|
+ """
|
|
|
|
+ # 获取绝对路径
|
|
|
|
+ markdown_file = os.path.abspath(markdown_file)
|
|
|
|
+ print(f"输入文件绝对路径: {markdown_file}")
|
|
|
|
+
|
|
|
|
+ # 检查输入文件是否存在
|
|
|
|
+ if not os.path.exists(markdown_file):
|
|
|
|
+ raise FileNotFoundError(f"Markdown文件不存在: {markdown_file}")
|
|
|
|
+ print(f"输入文件存在: {markdown_file}")
|
|
|
|
+
|
|
|
|
+ # 如果没有指定输出文件,则使用与输入文件相同的名称
|
|
|
|
+ if output_docx is None:
|
|
|
|
+ output_docx = os.path.splitext(markdown_file)[0] + '.docx'
|
|
|
|
+ else:
|
|
|
|
+ output_docx = os.path.abspath(output_docx)
|
|
|
|
+ print(f"输出DOCX文件路径: {output_docx}")
|
|
|
|
+
|
|
|
|
+ # 确保输出目录存在
|
|
|
|
+ output_dir = os.path.dirname(output_docx)
|
|
|
|
+ if not os.path.exists(output_dir):
|
|
|
|
+ os.makedirs(output_dir)
|
|
|
|
+ print(f"创建输出目录: {output_dir}")
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ # 使用pandoc将markdown转换为docx
|
|
|
|
+ cmd = [
|
|
|
|
+ PANDOC_PATH,
|
|
|
|
+ markdown_file,
|
|
|
|
+ '-o',
|
|
|
|
+ output_docx,
|
|
|
|
+ '--standalone', # 生成独立的文档
|
|
|
|
+ '--wrap=preserve', # 保持原始换行
|
|
|
|
+ '--highlight-style=tango', # 代码高亮样式
|
|
|
|
+ '--metadata', 'lang=zh-CN' # 设置文档语言
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ print(f"\n执行转换命令: {' '.join(cmd)}")
|
|
|
|
+
|
|
|
|
+ # 使用utf-8编码执行命令
|
|
|
|
+ result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
|
|
|
|
+ print(f"命令输出: {result.stdout}")
|
|
|
|
+ if result.stderr:
|
|
|
|
+ print(f"命令警告: {result.stderr}")
|
|
|
|
+
|
|
|
|
+ # 检查DOCX是否生成
|
|
|
|
+ if os.path.exists(output_docx):
|
|
|
|
+ print(f"DOCX文件已成功生成: {output_docx}")
|
|
|
|
+ return output_docx
|
|
|
|
+ else:
|
|
|
|
+ raise Exception(f"DOCX文件未生成: {output_docx}")
|
|
|
|
+
|
|
|
|
+ except subprocess.CalledProcessError as e:
|
|
|
|
+ print(f"命令执行错误: {e}")
|
|
|
|
+ print(f"错误输出: {e.output if hasattr(e, 'output') else '无输出'}")
|
|
|
|
+ raise Exception(f"转换过程中出错: {str(e)}")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"发生错误: {str(e)}")
|
|
|
|
+ raise Exception(f"发生未知错误: {str(e)}")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def markdown_to_pdf(markdown_file: str, output_pdf: str = None) -> str:
|
|
|
|
+ """
|
|
|
|
+ 将Markdown文件转换为PDF文件
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ markdown_file (str): Markdown文件的路径
|
|
|
|
+ output_pdf (str, optional): 输出的PDF文件路径。如果为None,则使用与输入文件相同的名称
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ str: 生成的PDF文件路径
|
|
|
|
+ """
|
|
|
|
+ # 检查依赖
|
|
|
|
+ check_dependencies()
|
|
|
|
+
|
|
|
|
+ # 获取绝对路径
|
|
|
|
+ markdown_file = os.path.abspath(markdown_file)
|
|
|
|
+ print(f"输入文件绝对路径: {markdown_file}")
|
|
|
|
+
|
|
|
|
+ # 检查输入文件是否存在
|
|
|
|
+ if not os.path.exists(markdown_file):
|
|
|
|
+ raise FileNotFoundError(f"Markdown文件不存在: {markdown_file}")
|
|
|
|
+ print(f"输入文件存在: {markdown_file}")
|
|
|
|
+
|
|
|
|
+ # 如果没有指定输出文件,则使用与输入文件相同的名称
|
|
|
|
+ if output_pdf is None:
|
|
|
|
+ output_pdf = os.path.splitext(markdown_file)[0] + '.pdf'
|
|
|
|
+ else:
|
|
|
|
+ output_pdf = os.path.abspath(output_pdf)
|
|
|
|
+ print(f"输出PDF文件路径: {output_pdf}")
|
|
|
|
+
|
|
|
|
+ # 确保输出目录存在
|
|
|
|
+ output_dir = os.path.dirname(output_pdf)
|
|
|
|
+ if not os.path.exists(output_dir):
|
|
|
|
+ os.makedirs(output_dir)
|
|
|
|
+ print(f"创建输出目录: {output_dir}")
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ # 创建支持中文的LaTeX模板
|
|
|
|
+ latex_template = create_latex_template()
|
|
|
|
+ print(f"LaTeX模板文件: {latex_template}")
|
|
|
|
+
|
|
|
|
+ # 使用pandoc直接将markdown转换为pdf
|
|
|
|
+ cmd = [
|
|
|
|
+ PANDOC_PATH,
|
|
|
|
+ markdown_file,
|
|
|
|
+ '-o',
|
|
|
|
+ output_pdf,
|
|
|
|
+ '--pdf-engine=xelatex',
|
|
|
|
+ f'--template={latex_template}',
|
|
|
|
+ '--listings',
|
|
|
|
+ '--highlight-style=tango',
|
|
|
|
+ '--verbose'
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ print(f"\n执行转换命令: {' '.join(cmd)}")
|
|
|
|
+
|
|
|
|
+ # 使用utf-8编码执行命令
|
|
|
|
+ result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
|
|
|
|
+ print(f"命令输出: {result.stdout}")
|
|
|
|
+ if result.stderr:
|
|
|
|
+ print(f"命令警告: {result.stderr}")
|
|
|
|
+
|
|
|
|
+ # 检查PDF是否生成
|
|
|
|
+ if os.path.exists(output_pdf):
|
|
|
|
+ print(f"PDF文件已成功生成: {output_pdf}")
|
|
|
|
+ return output_pdf
|
|
|
|
+ else:
|
|
|
|
+ raise Exception(f"PDF文件未生成: {output_pdf}")
|
|
|
|
+
|
|
|
|
+ except subprocess.CalledProcessError as e:
|
|
|
|
+ print(f"命令执行错误: {e}")
|
|
|
|
+ print(f"错误输出: {e.output if hasattr(e, 'output') else '无输出'}")
|
|
|
|
+ raise Exception(f"转换过程中出错: {str(e)}")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"发生错误: {str(e)}")
|
|
|
|
+ raise Exception(f"发生未知错误: {str(e)}")
|
|
|
|
+ finally:
|
|
|
|
+ # 清理临时文件
|
|
|
|
+ if os.path.exists(latex_template):
|
|
|
|
+ try:
|
|
|
|
+ os.remove(latex_template)
|
|
|
|
+ print(f"已删除临时模板文件: {latex_template}")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"删除临时文件时出错: {str(e)}")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ # 使用示例
|
|
|
|
+ try:
|
|
|
|
+ # 获取当前脚本所在目录
|
|
|
|
+ current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
+ print(f"当前工作目录: {current_dir}")
|
|
|
|
+
|
|
|
|
+ # 示例:转换当前目录下的test.md文件
|
|
|
|
+ input_file = os.path.join(current_dir, "test.md")
|
|
|
|
+
|
|
|
|
+ # 转换为DOCX
|
|
|
|
+ docx_file = os.path.join(current_dir, "output.docx")
|
|
|
|
+ print(f"\n开始转换为DOCX...")
|
|
|
|
+ docx_path = markdown_to_docx(input_file, docx_file)
|
|
|
|
+ print(f"DOCX转换成功!")
|
|
|
|
+ print(f"DOCX文件已保存至: {docx_path}")
|
|
|
|
+
|
|
|
|
+ # 转换为PDF
|
|
|
|
+ pdf_file = os.path.join(current_dir, "output.pdf")
|
|
|
|
+ print(f"\n开始转换为PDF...")
|
|
|
|
+ pdf_path = markdown_to_pdf(input_file, pdf_file)
|
|
|
|
+ print(f"PDF转换成功!")
|
|
|
|
+ print(f"PDF文件已保存至: {pdf_path}")
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"转换失败: {str(e)}")
|