Loading preprocessing/extract_dialogues.py 0 → 100644 +39 −0 Original line number Diff line number Diff line import json def extract_info_from_dialogue(dialogue): """Extracts service information and utterances from a single dialogue.""" knowledge_base = dialogue['services'] utterances_list = [] # Extracting service information where active_intent is not NONE active_services = [frame['service'] for turn in dialogue['turns'] for frame in turn.get('frames', []) if frame.get('state', {}).get('active_intent') != "NONE"] for turn in dialogue['turns']: speaker_utterance = f"{turn['speaker']}: {turn['utterance']}" utterances_list.append(speaker_utterance) return { "knowledge_base": knowledge_base, "active_services": active_services, "utterances_list": utterances_list } # Input the filename from the user filename = "dialogues.json" # Load the data from the file with open(filename, 'r') as file: data = json.load(file) # Process and save each dialogue separately for i, dialogue in enumerate(data): extracted_info = extract_info_from_dialogue(dialogue) output_filename = f"extracted_dialogue_{i + 1}.txt" # Format the extracted information and save it to a text file with open(output_filename, 'w') as outfile: outfile.write("Knowledge Base: " + ', '.join(extracted_info['knowledge_base']) + '\n') outfile.write("Active Services: " + ', '.join(extracted_info['active_services']) + '\n\n') outfile.write('\n'.join(extracted_info['utterances_list'])) print(f"Saved extracted data for dialogue {i + 1} to {output_filename}") preprocessing/extract_domain_knowledge.py 0 → 100644 +61 −0 Original line number Diff line number Diff line import json def extract_active_frames_from_dialogue(dialogue): """Extracts frames with active intents from a single dialogue.""" # Extract frames where active_intent is not NONE active_frames = [frame for turn in dialogue['turns'] for frame in turn.get('frames', []) if frame.get('state', {}).get('active_intent') != "NONE"] return active_frames def format_frame(frame): # Extract primary information service = frame.get('service', 'Unknown Service') active_intent = frame.get('state', {}).get('active_intent', 'Unknown Intent') # Start with the service and intent formatted_info = f"Service: {service}\nIntent: {active_intent}\n" # Extract slots and their values for slot in frame.get('slots', []): slot_name = slot.get('slot', 'Unknown Slot') value = slot.get('value', 'Unknown Value') start = slot.get('start', '??') end = slot.get('exclusive_end', '??') formatted_info += f" Slot: {slot_name} -> {value} (Position: {start}-{end})\n" # Extract slot values from state slot_values = frame.get('state', {}).get('slot_values', {}) if slot_values: formatted_info += " Slot Values:\n" for key, values in slot_values.items(): formatted_info += f" {key} -> {', '.join(values)}\n" return formatted_info # Input the filename from the user filename = "dialogues.json" # Load the data from the file with open(filename, 'r') as file: data = json.load(file) # Process and save each dialogue's frames separately for i, dialogue in enumerate(data): extracted_frames = extract_active_frames_from_dialogue(dialogue) if extracted_frames: # Only save if there are active frames formatted_text = "" for frame in extracted_frames: formatted_text += format_frame(frame) formatted_text += '-' * 50 + '\n' # Separate frames output_filename = f"formatted_frames_dialogue_{i + 1}.txt" # Save the formatted text to a file with open(output_filename, 'w') as outfile: outfile.write(formatted_text) print(f"Saved formatted frames for dialogue {i + 1} to {output_filename}") Loading
preprocessing/extract_dialogues.py 0 → 100644 +39 −0 Original line number Diff line number Diff line import json def extract_info_from_dialogue(dialogue): """Extracts service information and utterances from a single dialogue.""" knowledge_base = dialogue['services'] utterances_list = [] # Extracting service information where active_intent is not NONE active_services = [frame['service'] for turn in dialogue['turns'] for frame in turn.get('frames', []) if frame.get('state', {}).get('active_intent') != "NONE"] for turn in dialogue['turns']: speaker_utterance = f"{turn['speaker']}: {turn['utterance']}" utterances_list.append(speaker_utterance) return { "knowledge_base": knowledge_base, "active_services": active_services, "utterances_list": utterances_list } # Input the filename from the user filename = "dialogues.json" # Load the data from the file with open(filename, 'r') as file: data = json.load(file) # Process and save each dialogue separately for i, dialogue in enumerate(data): extracted_info = extract_info_from_dialogue(dialogue) output_filename = f"extracted_dialogue_{i + 1}.txt" # Format the extracted information and save it to a text file with open(output_filename, 'w') as outfile: outfile.write("Knowledge Base: " + ', '.join(extracted_info['knowledge_base']) + '\n') outfile.write("Active Services: " + ', '.join(extracted_info['active_services']) + '\n\n') outfile.write('\n'.join(extracted_info['utterances_list'])) print(f"Saved extracted data for dialogue {i + 1} to {output_filename}")
preprocessing/extract_domain_knowledge.py 0 → 100644 +61 −0 Original line number Diff line number Diff line import json def extract_active_frames_from_dialogue(dialogue): """Extracts frames with active intents from a single dialogue.""" # Extract frames where active_intent is not NONE active_frames = [frame for turn in dialogue['turns'] for frame in turn.get('frames', []) if frame.get('state', {}).get('active_intent') != "NONE"] return active_frames def format_frame(frame): # Extract primary information service = frame.get('service', 'Unknown Service') active_intent = frame.get('state', {}).get('active_intent', 'Unknown Intent') # Start with the service and intent formatted_info = f"Service: {service}\nIntent: {active_intent}\n" # Extract slots and their values for slot in frame.get('slots', []): slot_name = slot.get('slot', 'Unknown Slot') value = slot.get('value', 'Unknown Value') start = slot.get('start', '??') end = slot.get('exclusive_end', '??') formatted_info += f" Slot: {slot_name} -> {value} (Position: {start}-{end})\n" # Extract slot values from state slot_values = frame.get('state', {}).get('slot_values', {}) if slot_values: formatted_info += " Slot Values:\n" for key, values in slot_values.items(): formatted_info += f" {key} -> {', '.join(values)}\n" return formatted_info # Input the filename from the user filename = "dialogues.json" # Load the data from the file with open(filename, 'r') as file: data = json.load(file) # Process and save each dialogue's frames separately for i, dialogue in enumerate(data): extracted_frames = extract_active_frames_from_dialogue(dialogue) if extracted_frames: # Only save if there are active frames formatted_text = "" for frame in extracted_frames: formatted_text += format_frame(frame) formatted_text += '-' * 50 + '\n' # Separate frames output_filename = f"formatted_frames_dialogue_{i + 1}.txt" # Save the formatted text to a file with open(output_filename, 'w') as outfile: outfile.write(formatted_text) print(f"Saved formatted frames for dialogue {i + 1} to {output_filename}")