diff --git a/Readme.md b/Readme.md index 7868e4a..66ebc7f 100644 --- a/Readme.md +++ b/Readme.md @@ -35,7 +35,7 @@ pip install -r requirements.txt To run the sample files follow the below steps: ```bash -python markdown_to_word_converter.py +python md2docx_python.py Enter the path to the Markdown file (e.g., README.md): ..\sample_files\amazon_case_study.md Enter the path for the output Word file (e.g., README.docx): ..\sample_files\amazon_case_study.docx ``` diff --git a/build/lib/md2docx_python/__init__.py b/build/lib/md2docx_python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.py b/build/lib/md2docx_python/src/__init__.py similarity index 100% rename from src/__init__.py rename to build/lib/md2docx_python/src/__init__.py diff --git a/src/markdown_to_word_converter.py b/build/lib/md2docx_python/src/markdown_to_word_converter.py similarity index 100% rename from src/markdown_to_word_converter.py rename to build/lib/md2docx_python/src/markdown_to_word_converter.py diff --git a/build/lib/md2docx_python/src/md2docx_python.py b/build/lib/md2docx_python/src/md2docx_python.py new file mode 100644 index 0000000..f38ba3d --- /dev/null +++ b/build/lib/md2docx_python/src/md2docx_python.py @@ -0,0 +1,48 @@ +import markdown +from docx import Document +from bs4 import BeautifulSoup + +def markdown_to_word(markdown_file, word_file): + # Reading the Markdown file + with open(markdown_file, 'r', encoding='utf-8') as file: + markdown_content = file.read() + + # Converting Markdown to HTML + html_content = markdown.markdown(markdown_content) + + # Creating a new Word Document + doc = Document() + + # Converting HTML to text and add it to the Word Document + soup = BeautifulSoup(html_content, 'html.parser') + + # Adding content to the Word Document + for element in soup: + if element.name == 'h1': + doc.add_heading(element.text, level=1) + elif element.name == 'h2': + doc.add_heading(element.text, level=2) + elif element.name == 'h3': + doc.add_heading(element.text, level=3) + elif element.name == 'p': + paragraph = doc.add_paragraph() + for child in element.children: + if child.name == 'strong': + paragraph.add_run(child.text).bold = True + elif child.name == 'em': + paragraph.add_run(child.text).italic = True + else: + paragraph.add_run(child) + elif element.name == 'ul': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Bullet') + elif element.name == 'ol': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Number') + + doc.save(word_file) + +markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ") +word_file = input("Enter the path for the output Word file (e.g., README.docx): ") + +markdown_to_word(markdown_file, word_file) diff --git a/build/lib/src/__init__.py b/build/lib/src/__init__.py new file mode 100644 index 0000000..e8c07b0 --- /dev/null +++ b/build/lib/src/__init__.py @@ -0,0 +1 @@ +# This file marks the `src` directory as a Python package. \ No newline at end of file diff --git a/build/lib/src/markdown_to_word_converter.py b/build/lib/src/markdown_to_word_converter.py new file mode 100644 index 0000000..f38ba3d --- /dev/null +++ b/build/lib/src/markdown_to_word_converter.py @@ -0,0 +1,48 @@ +import markdown +from docx import Document +from bs4 import BeautifulSoup + +def markdown_to_word(markdown_file, word_file): + # Reading the Markdown file + with open(markdown_file, 'r', encoding='utf-8') as file: + markdown_content = file.read() + + # Converting Markdown to HTML + html_content = markdown.markdown(markdown_content) + + # Creating a new Word Document + doc = Document() + + # Converting HTML to text and add it to the Word Document + soup = BeautifulSoup(html_content, 'html.parser') + + # Adding content to the Word Document + for element in soup: + if element.name == 'h1': + doc.add_heading(element.text, level=1) + elif element.name == 'h2': + doc.add_heading(element.text, level=2) + elif element.name == 'h3': + doc.add_heading(element.text, level=3) + elif element.name == 'p': + paragraph = doc.add_paragraph() + for child in element.children: + if child.name == 'strong': + paragraph.add_run(child.text).bold = True + elif child.name == 'em': + paragraph.add_run(child.text).italic = True + else: + paragraph.add_run(child) + elif element.name == 'ul': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Bullet') + elif element.name == 'ol': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Number') + + doc.save(word_file) + +markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ") +word_file = input("Enter the path for the output Word file (e.g., README.docx): ") + +markdown_to_word(markdown_file, word_file) diff --git a/dist/md2docx_python-python-0.2.0.tar.gz b/dist/md2docx_python-python-0.2.0.tar.gz new file mode 100644 index 0000000..6915f38 Binary files /dev/null and b/dist/md2docx_python-python-0.2.0.tar.gz differ diff --git a/dist/md2docx_python_python-0.2.0-py3-none-any.whl b/dist/md2docx_python_python-0.2.0-py3-none-any.whl new file mode 100644 index 0000000..fc892f5 Binary files /dev/null and b/dist/md2docx_python_python-0.2.0-py3-none-any.whl differ diff --git a/md2docx_python/__init__.py b/md2docx_python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/md2docx_python/__pycache__/__init__.cpython-39.pyc b/md2docx_python/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..a15ffbb Binary files /dev/null and b/md2docx_python/__pycache__/__init__.cpython-39.pyc differ diff --git a/md2docx_python/src/__init__.py b/md2docx_python/src/__init__.py new file mode 100644 index 0000000..e8c07b0 --- /dev/null +++ b/md2docx_python/src/__init__.py @@ -0,0 +1 @@ +# This file marks the `src` directory as a Python package. \ No newline at end of file diff --git a/md2docx_python/src/md2docx_python.py b/md2docx_python/src/md2docx_python.py new file mode 100644 index 0000000..f38ba3d --- /dev/null +++ b/md2docx_python/src/md2docx_python.py @@ -0,0 +1,48 @@ +import markdown +from docx import Document +from bs4 import BeautifulSoup + +def markdown_to_word(markdown_file, word_file): + # Reading the Markdown file + with open(markdown_file, 'r', encoding='utf-8') as file: + markdown_content = file.read() + + # Converting Markdown to HTML + html_content = markdown.markdown(markdown_content) + + # Creating a new Word Document + doc = Document() + + # Converting HTML to text and add it to the Word Document + soup = BeautifulSoup(html_content, 'html.parser') + + # Adding content to the Word Document + for element in soup: + if element.name == 'h1': + doc.add_heading(element.text, level=1) + elif element.name == 'h2': + doc.add_heading(element.text, level=2) + elif element.name == 'h3': + doc.add_heading(element.text, level=3) + elif element.name == 'p': + paragraph = doc.add_paragraph() + for child in element.children: + if child.name == 'strong': + paragraph.add_run(child.text).bold = True + elif child.name == 'em': + paragraph.add_run(child.text).italic = True + else: + paragraph.add_run(child) + elif element.name == 'ul': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Bullet') + elif element.name == 'ol': + for li in element.find_all('li'): + doc.add_paragraph(li.text, style='List Number') + + doc.save(word_file) + +markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ") +word_file = input("Enter the path for the output Word file (e.g., README.docx): ") + +markdown_to_word(markdown_file, word_file) diff --git a/md2docx_python_python.egg-info/PKG-INFO b/md2docx_python_python.egg-info/PKG-INFO new file mode 100644 index 0000000..f07a707 --- /dev/null +++ b/md2docx_python_python.egg-info/PKG-INFO @@ -0,0 +1,99 @@ +Metadata-Version: 2.1 +Name: md2docx-python-python +Version: 0.1.0 +Summary: Markdown to Word Converter. + Simple and straight forward Python utility + that converts a Markdown file (`.md`) to a Microsoft + Word document (`.docx`). It supports basic Markdown + elements, including headings, bold and italic text, + and both unordered and ordered lists. +Home-page: https://github.com/shloktech/md2docx-python +Author: Shlok Tadilkar +Author-email: shloktadilkar@gmail.com +License: MIT +Description: # Markdown to Word Converter + + ## Overview + + Simple and straight forward Python utility that converts a Markdown file (`.md`) to a Microsoft Word document (`.docx`). It supports basic Markdown elements, including headings, bold and italic text, and both unordered and ordered lists. + + #### Input .md file: + ![image](https://github.com/user-attachments/assets/c2325e52-05a7-4e11-8f28-4eeb3d8c06f5) + + #### Output .docx file: + ![image](https://github.com/user-attachments/assets/3e48a9dd-8fe3-43cc-8246-164c58e95179) + + + ## Features + + - Converts Markdown headers (`#`, `##`, `###`) to Word document headings. + - Supports bold and italic text formatting. + - Converts unordered (`*`, `-`) and ordered (`1.`, `2.`) lists. + - Handles paragraphs with mixed content. + + ## Prerequisites + + You need to have Python installed on your system along with the following libraries: + + - `markdown` for converting Markdown to HTML. + - `python-docx` for creating and editing Word documents. + - `beautifulsoup4` for parsing HTML. + + You can install the required libraries using pip: + + ```bash + pip install -r requirements.txt + ``` + + To run the sample files follow the below steps: + + ```bash + python md2docx_python.py + Enter the path to the Markdown file (e.g., README.md): ..\sample_files\amazon_case_study.md + Enter the path for the output Word file (e.g., README.docx): ..\sample_files\amazon_case_study.docx + ``` + + ## Why this repo and not others ? + + Here are some reasons why this repo might be considered better or more suitable for certain use cases compared to other scripts available on the internet: + + ### 1. **Comprehensive Markdown Support** + - **Header Levels**: The script supports multiple header levels (`h1`, `h2`, `h3`), which is important for properly structuring the document. + - **Bold and Italic Text**: It handles bold (`**`) and italic (`*`) text, providing more accurate formatting in the Word document. + + ### 2. **Proper List Formatting** + - **Unordered and Ordered Lists**: The script correctly formats both unordered (`*`, `-`) and ordered lists (`1.`, `2.`) in the Word document. This ensures that lists appear as expected without additional line breaks or formatting issues. + + ### 3. **Use of Well-Supported Libraries** + - **Markdown to HTML Conversion**: Utilizes the `markdown` library, which is a widely used and reliable tool for converting Markdown to HTML. + - **HTML Parsing and Word Document Creation**: Employs `BeautifulSoup` for parsing HTML and `python-docx` for creating Word documents, both of which are robust and well-maintained libraries. + + ### 4. **Simplicity and Readability** + - **Clear Code Structure**: The script is designed to be straightforward and easy to understand, making it accessible for users who may want to customize or extend it. + - **Basic Markdown Elements**: Focuses on the most commonly used Markdown elements, ensuring compatibility with a wide range of Markdown files without unnecessary complexity. + + ### 5. **Customizability** + - **Easy to Modify**: Users can easily adjust the script to handle additional Markdown features or customize the output format based on their specific needs. + - **Example Usage**: Provides a clear example of how to use the script, making it easy for users to adapt it for their own files. + + ### 6. **Minimal Dependencies** + - **Lightweight and Focused**: The script relies on only a few libraries, which reduces potential conflicts and keeps the script lightweight. + + ### 7. **Handles Basic HTML Tags** + - **Text Formatting**: Properly handles bold and italic text by interpreting HTML tags (`strong`, `em`), ensuring that formatting is preserved when converting to Word. + + ### 8. **Privacy** + - If you are working in a corporate firm and you want to convert your markdown files to word and you use a online tool to do it then there are chances that they will store your file which can cause to a vital information leak of your company. With use of this repo you can easily do the conversion in your own system. + + ### Comparison to Other Scripts + - **Feature Set**: Some scripts may lack comprehensive support for Markdown features or may not handle lists and text formatting well. + - **Performance**: Depending on the implementation, performance might vary. This script is designed to be efficient for typical Markdown files. + - **User-Friendliness**: The clear and concise code in this script may make it more user-friendly and easier to modify compared to more complex alternatives. + + Overall, this script provides a balanced combination of functionality, simplicity, and ease of use, which can be advantageous for many users looking to convert Markdown files to Word documents. + + For any queries please start a discussion I will be happy to answer your queries :) + +Platform: UNKNOWN +Requires-Python: >=3.9.0 +Description-Content-Type: text/markdown diff --git a/md2docx_python_python.egg-info/SOURCES.txt b/md2docx_python_python.egg-info/SOURCES.txt new file mode 100644 index 0000000..75fc60a --- /dev/null +++ b/md2docx_python_python.egg-info/SOURCES.txt @@ -0,0 +1,9 @@ +setup.py +md2docx_python/__init__.py +md2docx_python/src/__init__.py +md2docx_python/src/md2docx_python.py +md2docx_python_python.egg-info/PKG-INFO +md2docx_python_python.egg-info/SOURCES.txt +md2docx_python_python.egg-info/dependency_links.txt +md2docx_python_python.egg-info/requires.txt +md2docx_python_python.egg-info/top_level.txt \ No newline at end of file diff --git a/md2docx_python_python.egg-info/dependency_links.txt b/md2docx_python_python.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/md2docx_python_python.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/md2docx_python_python.egg-info/requires.txt b/md2docx_python_python.egg-info/requires.txt new file mode 100644 index 0000000..8346960 --- /dev/null +++ b/md2docx_python_python.egg-info/requires.txt @@ -0,0 +1,3 @@ +markdown +python-docx +beautifulsoup4 diff --git a/md2docx_python_python.egg-info/top_level.txt b/md2docx_python_python.egg-info/top_level.txt new file mode 100644 index 0000000..d204e3c --- /dev/null +++ b/md2docx_python_python.egg-info/top_level.txt @@ -0,0 +1 @@ +md2docx_python diff --git a/run.py b/run.py new file mode 100644 index 0000000..8b33d06 --- /dev/null +++ b/run.py @@ -0,0 +1,6 @@ +from md2docx_python import markdown_to_word + +markdown_file = "md2docx_python-python\sample_files\amazon_case_study.docx" +word_file = "md2docx_python-python\sample_files\amazon_case_study2.md" + +markdown_to_word(markdown_file, word_file) \ No newline at end of file diff --git a/setup.py b/setup.py index e66500a..8a874ea 100644 --- a/setup.py +++ b/setup.py @@ -6,11 +6,12 @@ long_description = f.read() setup( - name='md2docx-python', + name='md2docx_python-python', version='0.1.0', url='https://github.com/shloktech/md2docx-python', author='Shlok Tadilkar', author_email='shloktadilkar@gmail.com', + license='MIT', description="""Markdown to Word Converter. Simple and straight forward Python utility that converts a Markdown file (`.md`) to a Microsoft @@ -21,6 +22,7 @@ long_description_content_type='text/markdown', packages=find_packages(), install_requires=['markdown', 'python-docx', 'beautifulsoup4'], + python_requires=">=3.9.0", ) diff --git a/src/__pycache__/__init__.cpython-39.pyc b/src/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 6de42f8..0000000 Binary files a/src/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/src/__pycache__/markdown_to_word_converter.cpython-39.pyc b/src/__pycache__/markdown_to_word_converter.cpython-39.pyc deleted file mode 100644 index 8677d37..0000000 Binary files a/src/__pycache__/markdown_to_word_converter.cpython-39.pyc and /dev/null differ