merge_md.py
· 2.5 KiB · Python
Raw
import zipfile
import os
import re
def extract_and_merge_markdown_files(zip_path, output_file="merged.md"):
"""
Extracts markdown files from a zip archive and merges them into a single markdown file.
Files are expected to be named like 1.md, 2.md, etc.
Args:
zip_path (str): Path to the zip file
output_file (str): Name of the output markdown file
"""
# Create a temporary directory to extract files
temp_dir = "temp_extracted"
os.makedirs(temp_dir, exist_ok=True)
# Extract all files from the zip archive
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Find all markdown files in the extracted directory
md_files = []
for root, _, files in os.walk(temp_dir):
for file in files:
if file.endswith('.md'):
md_files.append(os.path.join(root, file))
# Sort files based on their numeric prefix
def get_file_number(file_path):
file_name = os.path.basename(file_path)
match = re.match(r'(\d+)\.md', file_name)
if match:
return int(match.group(1))
return float('inf') # Non-matching files go to the end
md_files.sort(key=get_file_number)
# Merge the sorted files into a single markdown file
with open(output_file, 'w', encoding='utf-8') as outfile:
for md_file in md_files:
file_name = os.path.basename(md_file)
outfile.write(f"<!-- From file: {file_name} -->\n\n")
with open(md_file, 'r', encoding='utf-8') as infile:
content = infile.read()
outfile.write(content)
# Add a newline between files for better separation
if not content.endswith('\n'):
outfile.write('\n')
outfile.write('\n')
# Clean up: remove temporary directory
import shutil
shutil.rmtree(temp_dir)
print(f"Successfully merged {len(md_files)} markdown files into {output_file}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive")
parser.add_argument("zip_file", help="Path to the zip file containing markdown files")
parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)")
args = parser.parse_args()
extract_and_merge_markdown_files(args.zip_file, args.output)
1 | import zipfile |
2 | import os |
3 | import re |
4 | |
5 | def extract_and_merge_markdown_files(zip_path, output_file="merged.md"): |
6 | """ |
7 | Extracts markdown files from a zip archive and merges them into a single markdown file. |
8 | Files are expected to be named like 1.md, 2.md, etc. |
9 | |
10 | Args: |
11 | zip_path (str): Path to the zip file |
12 | output_file (str): Name of the output markdown file |
13 | """ |
14 | # Create a temporary directory to extract files |
15 | temp_dir = "temp_extracted" |
16 | os.makedirs(temp_dir, exist_ok=True) |
17 | |
18 | # Extract all files from the zip archive |
19 | with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
20 | zip_ref.extractall(temp_dir) |
21 | |
22 | # Find all markdown files in the extracted directory |
23 | md_files = [] |
24 | for root, _, files in os.walk(temp_dir): |
25 | for file in files: |
26 | if file.endswith('.md'): |
27 | md_files.append(os.path.join(root, file)) |
28 | |
29 | # Sort files based on their numeric prefix |
30 | def get_file_number(file_path): |
31 | file_name = os.path.basename(file_path) |
32 | match = re.match(r'(\d+)\.md', file_name) |
33 | if match: |
34 | return int(match.group(1)) |
35 | return float('inf') # Non-matching files go to the end |
36 | |
37 | md_files.sort(key=get_file_number) |
38 | |
39 | # Merge the sorted files into a single markdown file |
40 | with open(output_file, 'w', encoding='utf-8') as outfile: |
41 | for md_file in md_files: |
42 | file_name = os.path.basename(md_file) |
43 | outfile.write(f"<!-- From file: {file_name} -->\n\n") |
44 | |
45 | with open(md_file, 'r', encoding='utf-8') as infile: |
46 | content = infile.read() |
47 | outfile.write(content) |
48 | |
49 | # Add a newline between files for better separation |
50 | if not content.endswith('\n'): |
51 | outfile.write('\n') |
52 | outfile.write('\n') |
53 | |
54 | # Clean up: remove temporary directory |
55 | import shutil |
56 | shutil.rmtree(temp_dir) |
57 | |
58 | print(f"Successfully merged {len(md_files)} markdown files into {output_file}") |
59 | |
60 | if __name__ == "__main__": |
61 | import argparse |
62 | |
63 | parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive") |
64 | parser.add_argument("zip_file", help="Path to the zip file containing markdown files") |
65 | parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)") |
66 | |
67 | args = parser.parse_args() |
68 | |
69 | extract_and_merge_markdown_files(args.zip_file, args.output) |