Loading extract_domain_knowledge/chromedriver_win32/LICENSE.chromedriver 0 → 100644 +4724 −0 File added.Preview size limit exceeded, changes collapsed. Show changes extract_domain_knowledge/chromedriver_win32/chromedriver.exe 0 → 100644 +11.7 MiB File added.No diff preview for this file type. View file extract_domain_knowledge/extract_domain_knowledge.py 0 → 100644 +68 −0 Original line number Diff line number Diff line import json def read_json_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) def extract_dialogue_info(dialogues): formatted_dialogues = [] for dialogue in dialogues: services_involved = set() service_details = {} for turn in dialogue["turns"]: if turn["speaker"] == "USER": for frame in turn.get("frames", []): service = frame["service"] if "state" in frame: intent = frame["state"].get("active_intent", "NONE") slot_values = frame["state"].get("slot_values", {}) # Skip services with no actions or empty preferences if intent == "NONE" and not slot_values: continue services_involved.add(service) # Consolidate information for each service type if service not in service_details: service_details[service] = { "Service Type": service, "Actions": set(), "Preferences": {} } service_details[service]["Actions"].add(intent) for key, value in slot_values.items(): if key not in service_details[service]["Preferences"]: service_details[service]["Preferences"][key] = set() service_details[service]["Preferences"][key].update(value) # Format the details for each service for details in service_details.values(): details["Actions"] = list(details["Actions"]) for key in details["Preferences"]: details["Preferences"][key] = list(details["Preferences"][key]) dialogue_info = { "Dialogue ID": dialogue['dialogue_id'], "Services Involved": list(services_involved), "Service Details": list(service_details.values()) } formatted_dialogues.append(dialogue_info) return formatted_dialogues def write_to_file(filename, data): with open(filename, 'w', encoding='utf-8') as file: json.dump(data, file, indent=4) file_path = 'data/multiwoz_2.2/train/dialogues_001 (1).json' dialogues = read_json_file(file_path) formatted_dialogues = extract_dialogue_info(dialogues) output_file = 'formatted_dialogues.json' write_to_file(output_file, formatted_dialogues) Loading
extract_domain_knowledge/chromedriver_win32/LICENSE.chromedriver 0 → 100644 +4724 −0 File added.Preview size limit exceeded, changes collapsed. Show changes
extract_domain_knowledge/chromedriver_win32/chromedriver.exe 0 → 100644 +11.7 MiB File added.No diff preview for this file type. View file
extract_domain_knowledge/extract_domain_knowledge.py 0 → 100644 +68 −0 Original line number Diff line number Diff line import json def read_json_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) def extract_dialogue_info(dialogues): formatted_dialogues = [] for dialogue in dialogues: services_involved = set() service_details = {} for turn in dialogue["turns"]: if turn["speaker"] == "USER": for frame in turn.get("frames", []): service = frame["service"] if "state" in frame: intent = frame["state"].get("active_intent", "NONE") slot_values = frame["state"].get("slot_values", {}) # Skip services with no actions or empty preferences if intent == "NONE" and not slot_values: continue services_involved.add(service) # Consolidate information for each service type if service not in service_details: service_details[service] = { "Service Type": service, "Actions": set(), "Preferences": {} } service_details[service]["Actions"].add(intent) for key, value in slot_values.items(): if key not in service_details[service]["Preferences"]: service_details[service]["Preferences"][key] = set() service_details[service]["Preferences"][key].update(value) # Format the details for each service for details in service_details.values(): details["Actions"] = list(details["Actions"]) for key in details["Preferences"]: details["Preferences"][key] = list(details["Preferences"][key]) dialogue_info = { "Dialogue ID": dialogue['dialogue_id'], "Services Involved": list(services_involved), "Service Details": list(service_details.values()) } formatted_dialogues.append(dialogue_info) return formatted_dialogues def write_to_file(filename, data): with open(filename, 'w', encoding='utf-8') as file: json.dump(data, file, indent=4) file_path = 'data/multiwoz_2.2/train/dialogues_001 (1).json' dialogues = read_json_file(file_path) formatted_dialogues = extract_dialogue_info(dialogues) output_file = 'formatted_dialogues.json' write_to_file(output_file, formatted_dialogues)