Commit 70f4914d authored by finn's avatar finn
Browse files

upload

parent 631690f3
Loading
Loading
Loading
Loading
+39 −0
Original line number Diff line number Diff line
import json

def extract_info_from_dialogue(dialogue):
    """Extracts service information and utterances from a single dialogue."""
    knowledge_base = dialogue['services']
    utterances_list = []

    # Extracting service information where active_intent is not NONE
    active_services = [frame['service'] for turn in dialogue['turns'] for frame in turn.get('frames', []) if frame.get('state', {}).get('active_intent') != "NONE"]

    for turn in dialogue['turns']:
        speaker_utterance = f"{turn['speaker']}: {turn['utterance']}"
        utterances_list.append(speaker_utterance)

    return {
        "knowledge_base": knowledge_base,
        "active_services": active_services,
        "utterances_list": utterances_list
    }

# Input the filename from the user
filename = "dialogues.json"

# Load the data from the file
with open(filename, 'r') as file:
    data = json.load(file)

# Process and save each dialogue separately
for i, dialogue in enumerate(data):
    extracted_info = extract_info_from_dialogue(dialogue)
    output_filename = f"extracted_dialogue_{i + 1}.txt"

    # Format the extracted information and save it to a text file
    with open(output_filename, 'w') as outfile:
        outfile.write("Knowledge Base: " + ', '.join(extracted_info['knowledge_base']) + '\n')
        outfile.write("Active Services: " + ', '.join(extracted_info['active_services']) + '\n\n')
        outfile.write('\n'.join(extracted_info['utterances_list']))

    print(f"Saved extracted data for dialogue {i + 1} to {output_filename}")
+61 −0
Original line number Diff line number Diff line
import json


def extract_active_frames_from_dialogue(dialogue):
    """Extracts frames with active intents from a single dialogue."""
    # Extract frames where active_intent is not NONE
    active_frames = [frame for turn in dialogue['turns'] for frame in turn.get('frames', []) if
                     frame.get('state', {}).get('active_intent') != "NONE"]
    return active_frames


def format_frame(frame):
    # Extract primary information
    service = frame.get('service', 'Unknown Service')
    active_intent = frame.get('state', {}).get('active_intent', 'Unknown Intent')

    # Start with the service and intent
    formatted_info = f"Service: {service}\nIntent: {active_intent}\n"

    # Extract slots and their values
    for slot in frame.get('slots', []):
        slot_name = slot.get('slot', 'Unknown Slot')
        value = slot.get('value', 'Unknown Value')
        start = slot.get('start', '??')
        end = slot.get('exclusive_end', '??')
        formatted_info += f"    Slot: {slot_name} -> {value} (Position: {start}-{end})\n"

    # Extract slot values from state
    slot_values = frame.get('state', {}).get('slot_values', {})
    if slot_values:
        formatted_info += "    Slot Values:\n"
        for key, values in slot_values.items():
            formatted_info += f"        {key} -> {', '.join(values)}\n"

    return formatted_info


# Input the filename from the user
filename = "dialogues.json"

# Load the data from the file
with open(filename, 'r') as file:
    data = json.load(file)

# Process and save each dialogue's frames separately
for i, dialogue in enumerate(data):
    extracted_frames = extract_active_frames_from_dialogue(dialogue)

    if extracted_frames:  # Only save if there are active frames
        formatted_text = ""
        for frame in extracted_frames:
            formatted_text += format_frame(frame)
            formatted_text += '-' * 50 + '\n'  # Separate frames

        output_filename = f"formatted_frames_dialogue_{i + 1}.txt"

        # Save the formatted text to a file
        with open(output_filename, 'w') as outfile:
            outfile.write(formatted_text)

        print(f"Saved formatted frames for dialogue {i + 1} to {output_filename}")