Revisión de merge_md.py - "Hanmac Gists"

lectom revisó este gist 11 months ago. Ir a la revisión

1 file changed, 69 insertions

merge_md.py(archivo creado)

		@@ -0,0 +1,69 @@
1	+	import zipfile
2	+	import os
3	+	import re
4	+
5	+	def extract_and_merge_markdown_files(zip_path, output_file="merged.md"):
6	+	"""
7	+	Extracts markdown files from a zip archive and merges them into a single markdown file.
8	+	Files are expected to be named like 1.md, 2.md, etc.
9	+
10	+	Args:
11	+	zip_path (str): Path to the zip file
12	+	output_file (str): Name of the output markdown file
13	+	"""
14	+	# Create a temporary directory to extract files
15	+	temp_dir = "temp_extracted"
16	+	os.makedirs(temp_dir, exist_ok=True)
17	+
18	+	# Extract all files from the zip archive
19	+	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
20	+	zip_ref.extractall(temp_dir)
21	+
22	+	# Find all markdown files in the extracted directory
23	+	md_files = []
24	+	for root, _, files in os.walk(temp_dir):
25	+	for file in files:
26	+	if file.endswith('.md'):
27	+	md_files.append(os.path.join(root, file))
28	+
29	+	# Sort files based on their numeric prefix
30	+	def get_file_number(file_path):
31	+	file_name = os.path.basename(file_path)
32	+	match = re.match(r'(\d+)\.md', file_name)
33	+	if match:
34	+	return int(match.group(1))
35	+	return float('inf') # Non-matching files go to the end
36	+
37	+	md_files.sort(key=get_file_number)
38	+
39	+	# Merge the sorted files into a single markdown file
40	+	with open(output_file, 'w', encoding='utf-8') as outfile:
41	+	for md_file in md_files:
42	+	file_name = os.path.basename(md_file)
43	+	outfile.write(f"<!-- From file: {file_name} -->\n\n")
44	+
45	+	with open(md_file, 'r', encoding='utf-8') as infile:
46	+	content = infile.read()
47	+	outfile.write(content)
48	+
49	+	# Add a newline between files for better separation
50	+	if not content.endswith('\n'):
51	+	outfile.write('\n')
52	+	outfile.write('\n')
53	+
54	+	# Clean up: remove temporary directory
55	+	import shutil
56	+	shutil.rmtree(temp_dir)
57	+
58	+	print(f"Successfully merged {len(md_files)} markdown files into {output_file}")
59	+
60	+	if __name__ == "__main__":
61	+	import argparse
62	+
63	+	parser = argparse.ArgumentParser(description="Merge markdown files from a zip archive")
64	+	parser.add_argument("zip_file", help="Path to the zip file containing markdown files")
65	+	parser.add_argument("--output", "-o", default="merged.md", help="Output file name (default: merged.md)")
66	+
67	+	args = parser.parse_args()
68	+
69	+	extract_and_merge_markdown_files(args.zip_file, args.output)

Siguiente Anterior