|
| 1 | +from docx import Document |
| 2 | +from docx.shared import Inches |
| 3 | + |
| 4 | +from playground.actions_manager import agent_action |
| 5 | + |
| 6 | +import zipfile |
| 7 | +import os |
| 8 | + |
| 9 | +from playground.global_values import GlobalValues |
| 10 | + |
| 11 | + |
| 12 | +@agent_action |
| 13 | +def extract_text_from_doc(doc_filename): |
| 14 | + """ |
| 15 | + Extract all text from a Word document. |
| 16 | +
|
| 17 | + Parameters: |
| 18 | + doc_filename (str): The path to the Word document (.docx). |
| 19 | +
|
| 20 | + Returns: |
| 21 | + str: The extracted text from the document. |
| 22 | + """ |
| 23 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 24 | + doc = Document(doc_path) |
| 25 | + full_text = [] |
| 26 | + for paragraph in doc.paragraphs: |
| 27 | + full_text.append(paragraph.text) |
| 28 | + return "\n".join(full_text) |
| 29 | + |
| 30 | + |
| 31 | +@agent_action |
| 32 | +def extract_all_from_doc(doc_filename, output_folder): |
| 33 | + """ |
| 34 | + Extract all text, images, and code from a Word document. |
| 35 | +
|
| 36 | + Parameters: |
| 37 | + doc_filename (str): Path to the Word document (.docx). |
| 38 | + output_folder (str): Directory to save the extracted images and code files. |
| 39 | +
|
| 40 | + Returns: |
| 41 | + str: Full document text with placeholders for images and code. |
| 42 | + list: List of paths to the extracted images. |
| 43 | + list: List of paths to the extracted code files. |
| 44 | + """ |
| 45 | + # Ensure the output directory exists |
| 46 | + output_folder = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, output_folder) |
| 47 | + if not os.path.exists(output_folder): |
| 48 | + os.makedirs(output_folder) |
| 49 | + |
| 50 | + # Open the document |
| 51 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 52 | + doc = Document(doc_path) |
| 53 | + |
| 54 | + full_text = [] |
| 55 | + image_paths = [] |
| 56 | + code_paths = [] |
| 57 | + image_counter = 1 |
| 58 | + code_counter = 1 |
| 59 | + |
| 60 | + # Iterate through the document elements |
| 61 | + for paragraph in doc.paragraphs: |
| 62 | + paragraph_text = paragraph.text |
| 63 | + # Check if the paragraph contains code |
| 64 | + if "```" in paragraph_text: |
| 65 | + code_block = paragraph_text.split("```")[1] |
| 66 | + code_filename = os.path.join(output_folder, f"code_block_{code_counter}.py") |
| 67 | + with open(code_filename, "w") as code_file: |
| 68 | + code_file.write(code_block) |
| 69 | + code_paths.append(code_filename) |
| 70 | + full_text.append(f"[CODE BLOCK {code_counter}]") |
| 71 | + code_counter += 1 |
| 72 | + else: |
| 73 | + full_text.append(paragraph_text) |
| 74 | + |
| 75 | + for run in paragraph.runs: |
| 76 | + if "drawing" in run._element.xml: |
| 77 | + drawing_element = run._element.xpath(".//a:blip/@r:embed") |
| 78 | + if drawing_element: |
| 79 | + image_id = drawing_element[0] |
| 80 | + # Placeholder for image |
| 81 | + full_text.append(f" [image{image_counter}.png] ") |
| 82 | + image_counter += 1 |
| 83 | + |
| 84 | + # Extract images from the docx file |
| 85 | + with zipfile.ZipFile(doc_path, "r") as docx_zip: |
| 86 | + for file in docx_zip.namelist(): |
| 87 | + if file.startswith("word/media/"): |
| 88 | + image_data = docx_zip.read(file) |
| 89 | + image_filename = os.path.basename(file) |
| 90 | + image_path = os.path.join(output_folder, image_filename) |
| 91 | + with open(image_path, "wb") as image_file: |
| 92 | + image_file.write(image_data) |
| 93 | + image_paths.append(image_path) |
| 94 | + |
| 95 | + full_document_text = "\n".join(full_text) |
| 96 | + return full_document_text, image_paths, code_paths |
| 97 | + |
| 98 | + |
| 99 | +@agent_action |
| 100 | +def create_word_doc(doc_filename): |
| 101 | + """ |
| 102 | + Create a new Word document. |
| 103 | +
|
| 104 | + Parameters: |
| 105 | + doc_filename (str): The path to the Word document. |
| 106 | + """ |
| 107 | + doc = Document() |
| 108 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 109 | + doc.save(doc_path) |
| 110 | + return "Document created successfully." |
| 111 | + |
| 112 | + |
| 113 | +@agent_action |
| 114 | +def append_paragraph_to_word_doc(doc_filename, paragraph_text): |
| 115 | + """ |
| 116 | + Append a paragraph to a Word document. |
| 117 | +
|
| 118 | + Parameters: |
| 119 | + doc_filename (str): The path to the Word document. |
| 120 | + paragraph_text (str): The text to append as a paragraph. |
| 121 | + """ |
| 122 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 123 | + doc = Document(doc_path) |
| 124 | + doc.add_paragraph(paragraph_text) |
| 125 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 126 | + doc.save(doc_path) |
| 127 | + return "Paragraph added successfully." |
| 128 | + |
| 129 | + |
| 130 | +@agent_action |
| 131 | +def append_heading_to_word_doc(doc_filename, heading_text, level): |
| 132 | + """ |
| 133 | + Append a heading to a Word document. |
| 134 | +
|
| 135 | + Parameters: |
| 136 | + doc_filename (str): The path to the Word document. |
| 137 | + heading_text (str): The text to append as a heading. |
| 138 | + level (int): The heading level (0-9). |
| 139 | + """ |
| 140 | + level = int(level) |
| 141 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 142 | + doc = Document(doc_path) |
| 143 | + doc.add_heading(heading_text, level=level) |
| 144 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 145 | + doc.save(doc_path) |
| 146 | + return "Heading added successfully." |
| 147 | + |
| 148 | + |
| 149 | +def normalize_path(path): |
| 150 | + """ |
| 151 | + Normalize the given path, removing any reference indicators like '.' or '..'. |
| 152 | +
|
| 153 | + Parameters: |
| 154 | + path (str): The path to normalize. |
| 155 | +
|
| 156 | + Returns: |
| 157 | + str: The normalized path. |
| 158 | + """ |
| 159 | + return os.path.normpath(path) |
| 160 | + |
| 161 | + |
| 162 | +@agent_action |
| 163 | +def append_image_to_word_doc(doc_filename, image_filename, caption_text): |
| 164 | + """ |
| 165 | + Append an image with a caption to a Word document. |
| 166 | +
|
| 167 | + Parameters: |
| 168 | + doc_filename (str): The path to the Word document. |
| 169 | + image_filename (str): The path to the image file to append. |
| 170 | + caption_text (str): The caption text to append below the image. |
| 171 | + """ |
| 172 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 173 | + doc = Document(doc_path) |
| 174 | + image_path = os.path.join( |
| 175 | + GlobalValues.ASSISTANTS_WORKING_FOLDER, normalize_path(image_filename) |
| 176 | + ) |
| 177 | + doc.add_picture(image_path, width=Inches(5.0)) |
| 178 | + last_paragraph = doc.paragraphs[-1] |
| 179 | + last_paragraph.alignment = 1 # Center the image |
| 180 | + caption = doc.add_paragraph(caption_text) |
| 181 | + caption.alignment = 1 # Center the caption |
| 182 | + caption.style = "Caption" |
| 183 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 184 | + doc.save(doc_path) |
| 185 | + return "Image added successfully." |
| 186 | + |
| 187 | + |
| 188 | +@agent_action |
| 189 | +def append_code_to_word_doc(doc_filename, code_text): |
| 190 | + """ |
| 191 | + Append a code block to a Word document. |
| 192 | +
|
| 193 | + Parameters: |
| 194 | + doc_filename (str): The path to the Word document. |
| 195 | + code_text (str): The code text to append. |
| 196 | + """ |
| 197 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 198 | + doc = Document(doc_path) |
| 199 | + paragraph = doc.add_paragraph() |
| 200 | + run = paragraph.add_run(code_text) |
| 201 | + run.font.name = "Courier New" |
| 202 | + doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename) |
| 203 | + doc.save(doc_path) |
| 204 | + return "Code block added successfully." |
| 205 | + |
| 206 | + |
| 207 | +# # Usage example: |
| 208 | +# # Create a new document first |
| 209 | +# doc = Document() |
| 210 | +# doc.save('example.docx') |
| 211 | + |
| 212 | +# # Append content to the document |
| 213 | +# append_paragraph_to_word_doc('example.docx', 'This is a sample paragraph.') |
| 214 | +# append_heading_to_word_doc('example.docx', 'Sample Heading', 2) |
| 215 | +# append_image_to_word_doc('example.docx', '/path/to/image.png', 'Sample Image Caption') |
| 216 | +# append_code_to_word_doc('example.docx', 'print("Hello, world!")') |
| 217 | + |
| 218 | +# print("Content added to the document successfully.") |
0 commit comments