import tkinter as tk
from tkinter import filedialog, messagebox, ttk, Toplevel
import os
import fitz  # PyMuPDF
import re

class MmpiExtractorApp:
    def __init__(self, root):
        self.root = root
        self.root.title("MMPI-2 데이터 추출기 v2.3 (중복 해결)")
        self.root.geometry("700x500")
        self.root.configure(bg="#f0f2f5")

        # --- 스타일 설정 ---
        self.font_main = ("Segoe UI", 10)
        self.font_status = ("Segoe UI", 9)
        self.bg_color = "#f0f2f5"
        self.btn_color = "#ffffff"
        self.btn_accent_color = "#e0e8ff"

        self.file_paths = []
        self.results = {}

        main_frame = tk.Frame(self.root, padx=15, pady=15, bg=self.bg_color)
        main_frame.pack(fill=tk.BOTH, expand=True)

        # --- UI 요소들 (이전과 동일) ---
        btn_frame = tk.Frame(main_frame, bg=self.bg_color)
        btn_frame.pack(fill=tk.X, pady=(0, 10))
        self.add_file_button = tk.Button(btn_frame, text="PDF 파일 선택", command=self.add_files, font=self.font_main, bg=self.btn_color, relief="flat", padx=10)
        self.add_file_button.pack(side=tk.LEFT)
        self.clear_button = tk.Button(btn_frame, text="목록 지우기", command=self.clear_list, font=self.font_main, bg=self.btn_color, relief="flat", padx=10)
        self.clear_button.pack(side=tk.LEFT, padx=(10, 0))
        list_frame = tk.Frame(main_frame, bg=self.bg_color)
        list_frame.pack(fill=tk.BOTH, expand=True)
        self.listbox = tk.Listbox(list_frame, font=self.font_main, relief="solid", bd=1, bg="#ffffff")
        self.listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar = tk.Scrollbar(list_frame, orient="vertical", command=self.listbox.yview, relief="flat")
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        self.listbox.config(yscrollcommand=scrollbar.set)
        status_frame = tk.Frame(main_frame, bg=self.bg_color)
        status_frame.pack(fill=tk.X, pady=(10, 0))
        self.status_label = tk.Label(status_frame, text="MMPI-2 PDF 보고서 파일을 선택하세요.", font=self.font_status, anchor="w", bg=self.bg_color, fg="#555555")
        self.status_label.pack(fill=tk.X)
        self.progress = ttk.Progressbar(status_frame, orient="horizontal", length=100, mode="determinate")
        self.progress.pack(fill=tk.X, pady=(5, 0))
        self.extract_button = tk.Button(main_frame, text="데이터 추출 시작", command=self.start_extraction, font=(self.font_main[0], 11, 'bold'), height=2, bg=self.btn_accent_color, relief="flat")
        self.extract_button.pack(fill=tk.X, pady=(15, 0))

    def add_files(self):
        new_files = filedialog.askopenfilenames(title="MMPI-2 보고서 PDF 파일을 선택하세요", filetypes=[("PDF files", "*.pdf")])
        if new_files:
            for f in new_files:
                if f not in self.file_paths:
                    self.file_paths.append(f)
                    self.listbox.insert(tk.END, os.path.basename(f))
            self.update_status(f"{len(self.file_paths)}개의 파일이 선택되었습니다.")

    def clear_list(self):
        self.file_paths.clear()
        self.listbox.delete(0, tk.END)
        self.results = {}
        self.update_status("파일을 선택하세요.")

    def update_status(self, message):
        self.status_label.config(text=message)
        self.root.update_idletasks()

    def extract_name(self, page):
        try:
            text = page.get_text("text")
            match = re.search(r"(?:이름|성명)\s*:\s*(\S+)", text)
            if match: return match.group(1)
            blocks = page.get_text("blocks")
            if blocks: return blocks[0][4].strip().split('\n')[0]
        except Exception: pass
        return "이름모름"

    def reformat_numeric_blocks(self, text):
        lines = text.split('\n')
        if not lines: return ""
        new_lines = [lines[0]]
        for i in range(1, len(lines)):
            line, stripped_line = lines[i], lines[i].strip()
            is_numeric = False
            if stripped_line and ' ' not in stripped_line:
                try: 
                    float(stripped_line)
                    is_numeric = True
                except ValueError: is_numeric = False
            if is_numeric and new_lines and new_lines[-1].strip():
                new_lines[-1] += f" {stripped_line}"
            else:
                new_lines.append(line)
        return "\n".join(new_lines)

    def start_extraction(self):
        if not self.file_paths: return messagebox.showwarning("경고", "추출할 PDF 파일을 먼저 선택해주세요.")
        total_files = len(self.file_paths)
        self.progress["maximum"] = total_files
        self.progress["value"] = 0
        self.results = {}
        pages_to_extract = [1, 2, 3, 4]
        try:
            for i, file_path in enumerate(self.file_paths):
                filename = os.path.basename(file_path)
                self.update_status(f"({i+1}/{total_files}) 처리 중: {filename}")
                doc = fitz.open(file_path)
                if doc.page_count < 5: continue
                name = self.extract_name(doc.load_page(0))
                if name in self.results: name = f"{name}_{i+1}"
                
                # 1. 모든 페이지 텍스트 합치기
                combined_raw_text = ""
                for page_num in pages_to_extract:
                    combined_raw_text += doc.load_page(page_num).get_text("text") + "\n"
                
                # 2. 합쳐진 텍스트에서 중복 라인 제거
                lines = combined_raw_text.split('\n')
                unique_lines = []
                seen_lines = set()
                for line in lines:
                    stripped_line = line.strip()
                    # 비어있지 않고, 처음 보는 라인일 경우에만 추가
                    if stripped_line and stripped_line not in seen_lines:
                        seen_lines.add(stripped_line)
                        unique_lines.append(line)
                    # 비어있는 라인은 그대로 추가 (문단 구분 등 유지)
                    elif not stripped_line:
                        unique_lines.append(line)
                
                deduplicated_text = "\n".join(unique_lines)

                # 3. 중복 제거된 텍스트를 재포맷
                formatted_text = self.reformat_numeric_blocks(deduplicated_text)
                self.results[name] = formatted_text
                
                doc.close()
                self.progress["value"] = i + 1
                self.root.update_idletasks()

            if not self.results: return messagebox.showinfo("완료", "처리할 파일이 없습니다.")
            self.show_results_window()
            self.update_status("추출 완료. 결과 창을 확인하세요.")
        except Exception as e: messagebox.showerror("오류 발생", f"작업 중 오류가 발생했습니다:\n{e}")
        finally: self.progress["value"] = 0

    def show_results_window(self):
        result_window = Toplevel(self.root)
        result_window.title("추출 결과")
        result_window.geometry("800x600")
        result_window.configure(bg=self.bg_color)

        def copy_to_clipboard(content):
            self.root.clipboard_clear()
            self.root.clipboard_append(content)
            messagebox.showinfo("복사 완료", "클립보드에 복사되었습니다.", parent=result_window)

        def copy_all(): copy_to_clipboard("\n\n".join([f"---" + name + "---" + content for name, content in self.results.items()]))
        def copy_individual(name): copy_to_clipboard(self.results[name])
        def copy_for_ai(): copy_to_clipboard("\n\n".join(self.results.values()))

        top_button_frame = tk.Frame(result_window, pady=5, bg=self.bg_color)
        top_button_frame.pack(fill=tk.X, padx=10, pady=(10,0))
        tk.Button(top_button_frame, text="[ 전체 복사 ]", command=copy_all, font=self.font_main, bg=self.btn_accent_color, relief="flat", padx=10).pack(side=tk.LEFT)
        tk.Button(top_button_frame, text="[ AI에게 맡기기 (이름 제외) ]", command=copy_for_ai, font=self.font_main, bg=self.btn_accent_color, relief="flat", padx=10).pack(side=tk.LEFT, padx=5)

        individual_button_frame = tk.Frame(result_window, pady=2, bg=self.bg_color)
        individual_button_frame.pack(fill=tk.X, padx=10)
        max_cols = 7
        for i, name in enumerate(self.results.keys()):
            btn = tk.Button(individual_button_frame, text=f"{name}", command=lambda n=name: copy_individual(n), font=self.font_main, bg=self.btn_color, relief="flat", padx=8, pady=2)
            btn.grid(row=i // max_cols, column=i % max_cols, padx=2, pady=3, sticky="w")

        ttk.Separator(result_window, orient='horizontal').pack(fill='x', padx=10, pady=10)

        text_area_frame = tk.Frame(result_window, bg=self.bg_color)
        text_area_frame.pack(expand=True, fill="both", padx=10, pady=(0, 10))
        text_area = tk.Text(text_area_frame, wrap="word", font=("맑은 고딕", 10), relief="solid", bd=1, spacing1=2, spacing3=2)
        text_scroll = tk.Scrollbar(text_area_frame, command=text_area.yview, relief="flat")
        text_area.config(yscrollcommand=text_scroll.set)
        text_scroll.pack(side=tk.RIGHT, fill=tk.Y)
        text_area.pack(expand=True, fill="both")

        for name, content in self.results.items():
            text_area.insert(tk.END, f"============== {name} ===============\n", "header")
            text_area.insert(tk.END, content + "\n\n")
        text_area.tag_config("header", font=("Segoe UI", 12, "bold"), spacing3=10)
        text_area.config(state="disabled")

if __name__ == "__main__":
    root = tk.Tk()
    app = MmpiExtractorApp(root)
    root.mainloop()
