Last active 1741075571

merge_md.py Raw
1import zipfile
2import os
3import re
4
5def extract_and_merge_markdown_files(zip_path, output_file="merged.md"):
6 """
7 Extracts markdown files from a zip archive and merges them into a single markdown file.
8 Files are expected to be named like 1.md, 2.md, etc.
9
10 Args:
11 zip_path (str): Path to the zip file
12 output_file (str): Name of the output markdown file
13 """
14 # Create a temporary directory to extract files
15 temp_dir = "temp_extracted"
16 os.makedirs(temp_dir, exist_ok=True)
17
18 # Extract all files from the zip archive
19 with zipfile.ZipFile(zip_path, 'r') as zip_ref:
20 zip_ref.extractall(temp_dir)
21
22 # Find all markdown files in the extracted directory
23 md_files = []
24 for root, _, files in os.walk(temp_dir):
25 for file in files:
26 if file.endswith('.md'):
27 md_files.append(os.path.join(root, file))
28
29 # Sort files based on their numeric prefix
30 def get_file_number(file_path):
31 file_name = os.path.basename(file_path)
32 match = re.match(r'(\d+)\.md', file_name)
33 if match:
34 return int(match.group(1))
35 return float('inf') # Non-matching files go to the end
36
37 md_files.sort(key=get_file_number)
38
39 # Merge the sorted files into a single markdown file
40 with open(output_file, 'w', encoding='utf-8') as outfile:
41 for md_file in md_files:
42 file_name = os.path.basename(md_file)
43 outfile.write(f"<!-- From file: {file_name} -->\n\n")
44
45 with open(md_file, 'r', encoding='utf-8') as infile:
46 content = infile.read()
47 outfile.write(content)
48
49 # Add a newline between files for better separation
50 if not content.endswith('\n'):
51 outfile.write('\n')
52 outfile.write('\n')
53
54 # Clean up: remove temporary directory
55 import shutil
56 shutil.rmtree(temp_dir)
57
58 print(f"Successfully merged {len(md_files)} markdown files into {output_file}")
59
60if __name__ == "__main__":
61 import argparse
62
63 parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive")
64 parser.add_argument("zip_file", help="Path to the zip file containing markdown files")
65 parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)")
66
67 args = parser.parse_args()
68
69 extract_and_merge_markdown_files(args.zip_file, args.output)