merge_md.py
· 2.5 KiB · Python
Raw
import zipfile
import os
import re
def extract_and_merge_markdown_files(zip_path, output_file="merged.md"):
"""
Extracts markdown files from a zip archive and merges them into a single markdown file.
Files are expected to be named like 1.md, 2.md, etc.
Args:
zip_path (str): Path to the zip file
output_file (str): Name of the output markdown file
"""
# Create a temporary directory to extract files
temp_dir = "temp_extracted"
os.makedirs(temp_dir, exist_ok=True)
# Extract all files from the zip archive
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Find all markdown files in the extracted directory
md_files = []
for root, _, files in os.walk(temp_dir):
for file in files:
if file.endswith('.md'):
md_files.append(os.path.join(root, file))
# Sort files based on their numeric prefix
def get_file_number(file_path):
file_name = os.path.basename(file_path)
match = re.match(r'(\d+)\.md', file_name)
if match:
return int(match.group(1))
return float('inf') # Non-matching files go to the end
md_files.sort(key=get_file_number)
# Merge the sorted files into a single markdown file
with open(output_file, 'w', encoding='utf-8') as outfile:
for md_file in md_files:
file_name = os.path.basename(md_file)
outfile.write(f"<!-- From file: {file_name} -->\n\n")
with open(md_file, 'r', encoding='utf-8') as infile:
content = infile.read()
outfile.write(content)
# Add a newline between files for better separation
if not content.endswith('\n'):
outfile.write('\n')
outfile.write('\n')
# Clean up: remove temporary directory
import shutil
shutil.rmtree(temp_dir)
print(f"Successfully merged {len(md_files)} markdown files into {output_file}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive")
parser.add_argument("zip_file", help="Path to the zip file containing markdown files")
parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)")
args = parser.parse_args()
extract_and_merge_markdown_files(args.zip_file, args.output)
| 1 | import zipfile |
| 2 | import os |
| 3 | import re |
| 4 | |
| 5 | def extract_and_merge_markdown_files(zip_path, output_file="merged.md"): |
| 6 | """ |
| 7 | Extracts markdown files from a zip archive and merges them into a single markdown file. |
| 8 | Files are expected to be named like 1.md, 2.md, etc. |
| 9 | |
| 10 | Args: |
| 11 | zip_path (str): Path to the zip file |
| 12 | output_file (str): Name of the output markdown file |
| 13 | """ |
| 14 | # Create a temporary directory to extract files |
| 15 | temp_dir = "temp_extracted" |
| 16 | os.makedirs(temp_dir, exist_ok=True) |
| 17 | |
| 18 | # Extract all files from the zip archive |
| 19 | with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
| 20 | zip_ref.extractall(temp_dir) |
| 21 | |
| 22 | # Find all markdown files in the extracted directory |
| 23 | md_files = [] |
| 24 | for root, _, files in os.walk(temp_dir): |
| 25 | for file in files: |
| 26 | if file.endswith('.md'): |
| 27 | md_files.append(os.path.join(root, file)) |
| 28 | |
| 29 | # Sort files based on their numeric prefix |
| 30 | def get_file_number(file_path): |
| 31 | file_name = os.path.basename(file_path) |
| 32 | match = re.match(r'(\d+)\.md', file_name) |
| 33 | if match: |
| 34 | return int(match.group(1)) |
| 35 | return float('inf') # Non-matching files go to the end |
| 36 | |
| 37 | md_files.sort(key=get_file_number) |
| 38 | |
| 39 | # Merge the sorted files into a single markdown file |
| 40 | with open(output_file, 'w', encoding='utf-8') as outfile: |
| 41 | for md_file in md_files: |
| 42 | file_name = os.path.basename(md_file) |
| 43 | outfile.write(f"<!-- From file: {file_name} -->\n\n") |
| 44 | |
| 45 | with open(md_file, 'r', encoding='utf-8') as infile: |
| 46 | content = infile.read() |
| 47 | outfile.write(content) |
| 48 | |
| 49 | # Add a newline between files for better separation |
| 50 | if not content.endswith('\n'): |
| 51 | outfile.write('\n') |
| 52 | outfile.write('\n') |
| 53 | |
| 54 | # Clean up: remove temporary directory |
| 55 | import shutil |
| 56 | shutil.rmtree(temp_dir) |
| 57 | |
| 58 | print(f"Successfully merged {len(md_files)} markdown files into {output_file}") |
| 59 | |
| 60 | if __name__ == "__main__": |
| 61 | import argparse |
| 62 | |
| 63 | parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive") |
| 64 | parser.add_argument("zip_file", help="Path to the zip file containing markdown files") |
| 65 | parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)") |
| 66 | |
| 67 | args = parser.parse_args() |
| 68 | |
| 69 | extract_and_merge_markdown_files(args.zip_file, args.output) |