lectom revisó este gist 9 months ago. Ir a la revisión
1 file changed, 69 insertions
merge_md.py(archivo creado)
| @@ -0,0 +1,69 @@ | |||
| 1 | + | import zipfile | |
| 2 | + | import os | |
| 3 | + | import re | |
| 4 | + | ||
| 5 | + | def extract_and_merge_markdown_files(zip_path, output_file="merged.md"): | |
| 6 | + | """ | |
| 7 | + | Extracts markdown files from a zip archive and merges them into a single markdown file. | |
| 8 | + | Files are expected to be named like 1.md, 2.md, etc. | |
| 9 | + | ||
| 10 | + | Args: | |
| 11 | + | zip_path (str): Path to the zip file | |
| 12 | + | output_file (str): Name of the output markdown file | |
| 13 | + | """ | |
| 14 | + | # Create a temporary directory to extract files | |
| 15 | + | temp_dir = "temp_extracted" | |
| 16 | + | os.makedirs(temp_dir, exist_ok=True) | |
| 17 | + | ||
| 18 | + | # Extract all files from the zip archive | |
| 19 | + | with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| 20 | + | zip_ref.extractall(temp_dir) | |
| 21 | + | ||
| 22 | + | # Find all markdown files in the extracted directory | |
| 23 | + | md_files = [] | |
| 24 | + | for root, _, files in os.walk(temp_dir): | |
| 25 | + | for file in files: | |
| 26 | + | if file.endswith('.md'): | |
| 27 | + | md_files.append(os.path.join(root, file)) | |
| 28 | + | ||
| 29 | + | # Sort files based on their numeric prefix | |
| 30 | + | def get_file_number(file_path): | |
| 31 | + | file_name = os.path.basename(file_path) | |
| 32 | + | match = re.match(r'(\d+)\.md', file_name) | |
| 33 | + | if match: | |
| 34 | + | return int(match.group(1)) | |
| 35 | + | return float('inf') # Non-matching files go to the end | |
| 36 | + | ||
| 37 | + | md_files.sort(key=get_file_number) | |
| 38 | + | ||
| 39 | + | # Merge the sorted files into a single markdown file | |
| 40 | + | with open(output_file, 'w', encoding='utf-8') as outfile: | |
| 41 | + | for md_file in md_files: | |
| 42 | + | file_name = os.path.basename(md_file) | |
| 43 | + | outfile.write(f"<!-- From file: {file_name} -->\n\n") | |
| 44 | + | ||
| 45 | + | with open(md_file, 'r', encoding='utf-8') as infile: | |
| 46 | + | content = infile.read() | |
| 47 | + | outfile.write(content) | |
| 48 | + | ||
| 49 | + | # Add a newline between files for better separation | |
| 50 | + | if not content.endswith('\n'): | |
| 51 | + | outfile.write('\n') | |
| 52 | + | outfile.write('\n') | |
| 53 | + | ||
| 54 | + | # Clean up: remove temporary directory | |
| 55 | + | import shutil | |
| 56 | + | shutil.rmtree(temp_dir) | |
| 57 | + | ||
| 58 | + | print(f"Successfully merged {len(md_files)} markdown files into {output_file}") | |
| 59 | + | ||
| 60 | + | if __name__ == "__main__": | |
| 61 | + | import argparse | |
| 62 | + | ||
| 63 | + | parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive") | |
| 64 | + | parser.add_argument("zip_file", help="Path to the zip file containing markdown files") | |
| 65 | + | parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)") | |
| 66 | + | ||
| 67 | + | args = parser.parse_args() | |
| 68 | + | ||
| 69 | + | extract_and_merge_markdown_files(args.zip_file, args.output) | |
Siguiente
Anterior