import zipfile import os import re def extract_and_merge_markdown_files(zip_path, output_file="merged.md"): """ Extracts markdown files from a zip archive and merges them into a single markdown file. Files are expected to be named like 1.md, 2.md, etc. Args: zip_path (str): Path to the zip file output_file (str): Name of the output markdown file """ # Create a temporary directory to extract files temp_dir = "temp_extracted" os.makedirs(temp_dir, exist_ok=True) # Extract all files from the zip archive with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(temp_dir) # Find all markdown files in the extracted directory md_files = [] for root, _, files in os.walk(temp_dir): for file in files: if file.endswith('.md'): md_files.append(os.path.join(root, file)) # Sort files based on their numeric prefix def get_file_number(file_path): file_name = os.path.basename(file_path) match = re.match(r'(\d+)\.md', file_name) if match: return int(match.group(1)) return float('inf') # Non-matching files go to the end md_files.sort(key=get_file_number) # Merge the sorted files into a single markdown file with open(output_file, 'w', encoding='utf-8') as outfile: for md_file in md_files: file_name = os.path.basename(md_file) outfile.write(f"\n\n") with open(md_file, 'r', encoding='utf-8') as infile: content = infile.read() outfile.write(content) # Add a newline between files for better separation if not content.endswith('\n'): outfile.write('\n') outfile.write('\n') # Clean up: remove temporary directory import shutil shutil.rmtree(temp_dir) print(f"Successfully merged {len(md_files)} markdown files into {output_file}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive") parser.add_argument("zip_file", help="Path to the zip file containing markdown files") parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)") args = parser.parse_args() extract_and_merge_markdown_files(args.zip_file, args.output)