Skip to content

Commit 7895d79

Browse files
committed
New actions added
1 parent 2057181 commit 7895d79

File tree

10 files changed

+449
-8
lines changed

10 files changed

+449
-8
lines changed

‎logs/logs.txt

Lines changed: 201 additions & 7 deletions
Large diffs are not rendered by default.
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
from docx import Document
2+
from docx.shared import Inches
3+
4+
from playground.actions_manager import agent_action
5+
6+
import zipfile
7+
import os
8+
9+
from playground.global_values import GlobalValues
10+
11+
12+
@agent_action
13+
def extract_text_from_doc(doc_filename):
14+
"""
15+
Extract all text from a Word document.
16+
17+
Parameters:
18+
doc_filename (str): The path to the Word document (.docx).
19+
20+
Returns:
21+
str: The extracted text from the document.
22+
"""
23+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
24+
doc = Document(doc_path)
25+
full_text = []
26+
for paragraph in doc.paragraphs:
27+
full_text.append(paragraph.text)
28+
return "\n".join(full_text)
29+
30+
31+
@agent_action
32+
def extract_all_from_doc(doc_filename, output_folder):
33+
"""
34+
Extract all text, images, and code from a Word document.
35+
36+
Parameters:
37+
doc_filename (str): Path to the Word document (.docx).
38+
output_folder (str): Directory to save the extracted images and code files.
39+
40+
Returns:
41+
str: Full document text with placeholders for images and code.
42+
list: List of paths to the extracted images.
43+
list: List of paths to the extracted code files.
44+
"""
45+
# Ensure the output directory exists
46+
output_folder = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, output_folder)
47+
if not os.path.exists(output_folder):
48+
os.makedirs(output_folder)
49+
50+
# Open the document
51+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
52+
doc = Document(doc_path)
53+
54+
full_text = []
55+
image_paths = []
56+
code_paths = []
57+
image_counter = 1
58+
code_counter = 1
59+
60+
# Iterate through the document elements
61+
for paragraph in doc.paragraphs:
62+
paragraph_text = paragraph.text
63+
# Check if the paragraph contains code
64+
if "```" in paragraph_text:
65+
code_block = paragraph_text.split("```")[1]
66+
code_filename = os.path.join(output_folder, f"code_block_{code_counter}.py")
67+
with open(code_filename, "w") as code_file:
68+
code_file.write(code_block)
69+
code_paths.append(code_filename)
70+
full_text.append(f"[CODE BLOCK {code_counter}]")
71+
code_counter += 1
72+
else:
73+
full_text.append(paragraph_text)
74+
75+
for run in paragraph.runs:
76+
if "drawing" in run._element.xml:
77+
drawing_element = run._element.xpath(".//a:blip/@r:embed")
78+
if drawing_element:
79+
image_id = drawing_element[0]
80+
# Placeholder for image
81+
full_text.append(f" [image{image_counter}.png] ")
82+
image_counter += 1
83+
84+
# Extract images from the docx file
85+
with zipfile.ZipFile(doc_path, "r") as docx_zip:
86+
for file in docx_zip.namelist():
87+
if file.startswith("word/media/"):
88+
image_data = docx_zip.read(file)
89+
image_filename = os.path.basename(file)
90+
image_path = os.path.join(output_folder, image_filename)
91+
with open(image_path, "wb") as image_file:
92+
image_file.write(image_data)
93+
image_paths.append(image_path)
94+
95+
full_document_text = "\n".join(full_text)
96+
return full_document_text, image_paths, code_paths
97+
98+
99+
@agent_action
100+
def create_word_doc(doc_filename):
101+
"""
102+
Create a new Word document.
103+
104+
Parameters:
105+
doc_filename (str): The path to the Word document.
106+
"""
107+
doc = Document()
108+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
109+
doc.save(doc_path)
110+
return "Document created successfully."
111+
112+
113+
@agent_action
114+
def append_paragraph_to_word_doc(doc_filename, paragraph_text):
115+
"""
116+
Append a paragraph to a Word document.
117+
118+
Parameters:
119+
doc_filename (str): The path to the Word document.
120+
paragraph_text (str): The text to append as a paragraph.
121+
"""
122+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
123+
doc = Document(doc_path)
124+
doc.add_paragraph(paragraph_text)
125+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
126+
doc.save(doc_path)
127+
return "Paragraph added successfully."
128+
129+
130+
@agent_action
131+
def append_heading_to_word_doc(doc_filename, heading_text, level):
132+
"""
133+
Append a heading to a Word document.
134+
135+
Parameters:
136+
doc_filename (str): The path to the Word document.
137+
heading_text (str): The text to append as a heading.
138+
level (int): The heading level (0-9).
139+
"""
140+
level = int(level)
141+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
142+
doc = Document(doc_path)
143+
doc.add_heading(heading_text, level=level)
144+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
145+
doc.save(doc_path)
146+
return "Heading added successfully."
147+
148+
149+
def normalize_path(path):
150+
"""
151+
Normalize the given path, removing any reference indicators like '.' or '..'.
152+
153+
Parameters:
154+
path (str): The path to normalize.
155+
156+
Returns:
157+
str: The normalized path.
158+
"""
159+
return os.path.normpath(path)
160+
161+
162+
@agent_action
163+
def append_image_to_word_doc(doc_filename, image_filename, caption_text):
164+
"""
165+
Append an image with a caption to a Word document.
166+
167+
Parameters:
168+
doc_filename (str): The path to the Word document.
169+
image_filename (str): The path to the image file to append.
170+
caption_text (str): The caption text to append below the image.
171+
"""
172+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
173+
doc = Document(doc_path)
174+
image_path = os.path.join(
175+
GlobalValues.ASSISTANTS_WORKING_FOLDER, normalize_path(image_filename)
176+
)
177+
doc.add_picture(image_path, width=Inches(5.0))
178+
last_paragraph = doc.paragraphs[-1]
179+
last_paragraph.alignment = 1 # Center the image
180+
caption = doc.add_paragraph(caption_text)
181+
caption.alignment = 1 # Center the caption
182+
caption.style = "Caption"
183+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
184+
doc.save(doc_path)
185+
return "Image added successfully."
186+
187+
188+
@agent_action
189+
def append_code_to_word_doc(doc_filename, code_text):
190+
"""
191+
Append a code block to a Word document.
192+
193+
Parameters:
194+
doc_filename (str): The path to the Word document.
195+
code_text (str): The code text to append.
196+
"""
197+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
198+
doc = Document(doc_path)
199+
paragraph = doc.add_paragraph()
200+
run = paragraph.add_run(code_text)
201+
run.font.name = "Courier New"
202+
doc_path = os.path.join(GlobalValues.ASSISTANTS_WORKING_FOLDER, doc_filename)
203+
doc.save(doc_path)
204+
return "Code block added successfully."
205+
206+
207+
# # Usage example:
208+
# # Create a new document first
209+
# doc = Document()
210+
# doc.save('example.docx')
211+
212+
# # Append content to the document
213+
# append_paragraph_to_word_doc('example.docx', 'This is a sample paragraph.')
214+
# append_heading_to_word_doc('example.docx', 'Sample Heading', 2)
215+
# append_image_to_word_doc('example.docx', '/path/to/image.png', 'Sample Image Caption')
216+
# append_code_to_word_doc('example.docx', 'print("Hello, world!")')
217+
218+
# print("Content added to the document successfully.")

‎playground/assistants_api.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ def create_thread_message(self, thread_id, role, content, attachments=None):
2525
content=content,
2626
attachments=attachments,
2727
)
28+
29+
def get_threads(self):
30+
return self.client.beta.threads.
2831

2932
def create_assistant(
3033
self,

‎playground/assistants_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,10 @@ def handle_requires_action(self, data, run_id):
142142
for tool in data.required_action.submit_tool_outputs.tool_calls:
143143
if tool.function.name in self.action_manager.get_action_names():
144144
action = self.action_manager.get_action(tool.function.name)
145+
print(f"action: {tool.function.name} -> {action}")
145146
if action:
146147
args = json.loads(tool.function.arguments)
148+
print(f"action: {tool.function.name} -> {args}")
147149
output = action["pointer"](**args)
148150

149151
if hasattr(output, "data"):
@@ -176,4 +178,11 @@ def submit_tool_outputs(self, tool_outputs, run_id):
176178
for text in stream.text_deltas:
177179
self.output_queue.put(("text", text))
178180
except Exception as e:
179-
self.output_queue.put(("text", f"Error in tool outputs: {str(e)}"))
181+
msg = f"Run cancelled with error in tool outputs: {str(e)}"
182+
self.output_queue.put(("text", msg))
183+
client.beta.threads.runs.cancel(run_id=self.current_run.id)
184+
client.beta.threads.messages.create(
185+
thread_id=self.current_run.thread_id,
186+
role="assistant",
187+
content=msg,
188+
)

‎requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pytest
77
youtube-transcript-api
88
py-trees
99
python-pptx
10+
python-docx
1011
markdown
1112
moviepy
1213
opencv-python

‎tests/test_data/images/image.png

3.02 MB
Loading

‎tests/test_data/images/image1.png

3.02 MB
Loading

‎tests/test_data/images/image2.png

3.02 MB
Loading

‎tests/test_data/test_document.docx

4.14 MB
Binary file not shown.

‎tests/test_word_actions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from playground.assistant_actions.word_actions import extract_all_from_doc
2+
from playground.global_values import GlobalValues
3+
import os
4+
5+
# paths should be relative to the tests directory
6+
path = __file__.replace("test_word_actions.py", "")
7+
GlobalValues.set_value("ASSISTANTS_WORKING_FOLDER", os.path.join(path, "test_data"))
8+
9+
10+
def test_extract_all_from_doc():
11+
doc_path = "test_document.docx"
12+
output_folder = "images"
13+
full_text, images, code_files = extract_all_from_doc(doc_path, output_folder)
14+
assert len(images) == 2
15+
assert len(full_text) > 0
16+
assert len(code_files) == 0

0 commit comments

Comments
 (0)