"""
네이버 플레이스 크롤러 (GraphQL API 기반)
- pcmap-api.place.naver.com/place/graphql 사용
"""

import re
import time
import requests
from datetime import datetime

GRAPHQL_URL = 'https://pcmap-api.place.naver.com/place/graphql'

SESSION = requests.Session()
SESSION.headers.update({
    'User-Agent'     : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    'Accept'         : '*/*',
    'Accept-Language': 'ko-KR,ko;q=0.9',
    'Content-Type'   : 'application/json',
    'Referer'        : 'https://pcmap.place.naver.com/',
})


def _gql(query: str, variables: dict = None) -> dict:
    body = [{'query': query, 'variables': variables or {}}]
    try:
        r = SESSION.post(GRAPHQL_URL, json=body, timeout=12)
        r.raise_for_status()
        data = r.json()
        return (data[0].get('data') or {}) if isinstance(data, list) else {}
    except Exception as e:
        print(f'[GQL ERROR] {e}')
        return {}


def _parse_num(s) -> int:
    """'4,460' → 4460"""
    if s is None:
        return 0
    return int(str(s).replace(',', '').strip() or 0)


# ─────────────────────────────────────────────────────────────
# 1. 업체 기본정보 + 통계 수집
# ─────────────────────────────────────────────────────────────
PLACE_SUMMARY_Q = '''
query getPlaceDetail($query: String!, $display: Int) {
  places(input: {query: $query, display: $display}) {
    items {
      id name category address roadAddress phone
      visitorReviewCount blogCafeReviewCount x y
    }
  }
}'''

PLACE_RATING_Q = '''
query getPlaceRating($id: String!) {
  placeDetail(input: {id: $id}) {
    base { visitorReviewsScore }
  }
}'''

def get_place_summary(place_id: str, hint_name: str = '') -> dict:
    """
    place_id 에 해당하는 업체 기본정보 수집
    hint_name: DB에 저장된 업체명 (검색 정확도 향상)
    """
    query_str = hint_name or place_id
    data = _gql(PLACE_SUMMARY_Q, {'query': query_str, 'display': 20})
    items = data.get('places', {}).get('items', [])

    # place_id 매칭
    matched = next((it for it in items if str(it.get('id', '')) == str(place_id)), None)

    if not matched and hint_name:
        # hint_name 으로 못 찾으면 place_id 자체로 재검색
        data2 = _gql(PLACE_SUMMARY_Q, {'query': place_id, 'display': 5})
        items2 = data2.get('places', {}).get('items', [])
        matched = next((it for it in items2 if str(it.get('id', '')) == str(place_id)), None)

    if not matched:
        print(f'[WARN] place_id {place_id} 검색 결과에서 찾지 못함')
        return {}

    # 별점: placeDetail.base.visitorReviewsScore
    rating_data = _gql(PLACE_RATING_Q, {'id': place_id})
    rating = rating_data.get('placeDetail', {}).get('base', {}).get('visitorReviewsScore', 0.0) or 0.0

    # 방문자 리뷰수
    v_count = _parse_num(matched.get('visitorReviewCount', 0))
    # 블로그 리뷰수
    b_count = _parse_num(matched.get('blogCafeReviewCount', 0))

    return {
        'place_id'             : place_id,
        'name'                 : matched.get('name', ''),
        'category'             : matched.get('category', ''),
        'address'              : matched.get('roadAddress') or matched.get('address', ''),
        'phone'                : matched.get('phone', ''),
        'rating'               : float(rating),
        'visitor_review_count' : v_count,
        'blog_review_count'    : b_count,
        'collected_at'         : datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    }


# ─────────────────────────────────────────────────────────────
# 2. 리뷰 수집
# ─────────────────────────────────────────────────────────────
REVIEW_Q = '''
query getReviews($id: String!, $page: Int, $size: Int) {
  visitorReviews(input: {businessId: $id, page: $page, size: $size, sort: "RECENT"}) {
    total
    items {
      id
      author  { nickname }
      rating
      body
      visitCount
      created
    }
  }
}'''

def get_place_reviews(place_id: str, page: int = 1, size: int = 50) -> list:
    data = _gql(REVIEW_Q, {'id': place_id, 'page': page, 'size': size})
    items = data.get('visitorReviews', {}).get('items', [])
    reviews = []
    for it in items:
        reviews.append({
            'review_id'  : it.get('id', ''),
            'author'     : it.get('author', {}).get('nickname', ''),
            'rating'     : it.get('rating'),
            'content'    : it.get('body', ''),
            'visit_count': it.get('visitCount', 0),
            'created_at' : it.get('created', ''),
            'collected_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        })
    return reviews


# ─────────────────────────────────────────────────────────────
# 3. 키워드 검색 순위
# ─────────────────────────────────────────────────────────────
RANK_Q = '''
query getPlaceRank($query: String!, $display: Int) {
  places(input: {query: $query, display: $display}) {
    total
    items {
      id name category visitorReviewCount
    }
  }
}'''

def get_place_rank(keyword: str, my_place_id: str, max_rank: int = 30) -> dict:
    data = _gql(RANK_Q, {'query': keyword, 'display': max_rank})
    result = data.get('places', {})
    items  = result.get('items', [])
    total  = result.get('total', 0)

    rank        = None
    competitors = []
    for i, it in enumerate(items, 1):
        pid = str(it.get('id', ''))
        competitors.append({
            'rank'        : i,
            'place_id'    : pid,
            'name'        : it.get('name', ''),
            'category'    : it.get('category', ''),
            'review_count': _parse_num(it.get('visitorReviewCount', 0)),
            'is_my_place' : pid == str(my_place_id),
        })
        if pid == str(my_place_id):
            rank = i

    return {
        'keyword'    : keyword,
        'my_rank'    : rank,
        'total_found': len(items),
        'competitors': competitors,
        'searched_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    }


# ─────────────────────────────────────────────────────────────
# 4. 리뷰 키워드 빈도 분석
# ─────────────────────────────────────────────────────────────
STOPWORDS = {
    '이','가','을','를','은','는','에','의','도','로','으로','와','과','이나','나',
    '에서','까지','부터','한','하고','하는','있는','없는','있어요','없어요','이에요',
    '예요','이고','있고','정말','너무','진짜','완전','되게','매우','아주','그냥',
    '좀','또','다시','다음','번','곳','것','수','때','더','잘','같아요','같은',
    '이었','했어','갔어','왔어',  # 정규화 후 남는 무의미 어근
}

# 한국어 어미/조사 (긴 것부터 - greedy 매칭)
_KR_SUFFIXES = sorted([
    '이에요', '이고요', '이라고', '이라서', '이었어', '이에서',
    '어요', '아요', '이에', '이고', '이며', '이나', '이라', '이다',
    '에서', '까지', '부터', '이랑', '처럼', '만큼', '한테', '에게', '으로',
    '습니다', '니다', '어서', '아서', '네요', '군요',
    '을', '를', '이', '가', '은', '는', '도', '로', '에', '의', '와', '과', '나', '랑',
    '게', '고', '서',
], key=len, reverse=True)

def _normalize_kr(word: str) -> str:
    """조사/어미 제거 → 어근 반환 (어근이 2자 미만이면 원형 유지)"""
    for suffix in _KR_SUFFIXES:
        if word.endswith(suffix) and len(word) - len(suffix) >= 2:
            return word[:-len(suffix)]
    return word


def analyze_review_keywords(reviews: list, top_n: int = 20) -> list:
    word_count = {}
    for review in reviews:
        content = review.get('content', '') or ''
        for word in re.findall(r'[가-힣]{2,}', content):
            if word in STOPWORDS:
                continue
            stem = _normalize_kr(word)
            if stem in STOPWORDS or len(stem) < 2:
                continue
            word_count[stem] = word_count.get(stem, 0) + 1
    sorted_words = sorted(word_count.items(), key=lambda x: x[1], reverse=True)
    return [{'keyword': w, 'count': c} for w, c in sorted_words[:top_n]]


# ─────────────────────────────────────────────────────────────
# 출력 헬퍼 (테스트용)
# ─────────────────────────────────────────────────────────────
def print_summary(d: dict):
    print('\n' + '='*50)
    print(f"  업체명    : {d.get('name')}")
    print(f"  카테고리  : {d.get('category')}")
    print(f"  주소      : {d.get('address')}")
    print(f"  전화      : {d.get('phone')}")
    print(f"  별점(평균): {d.get('rating')} ⭐")
    print(f"  방문자리뷰: {d.get('visitor_review_count')}개")
    print(f"  수집시간  : {d.get('collected_at')}")
    print('='*50)

def print_rank(r: dict):
    print(f"\n검색어: [{r.get('keyword')}]")
    rank = r.get('my_rank')
    print(f"  내 업체: {'{}위'.format(rank) if rank else '순위권 밖'} / 전체 {r.get('total_found')}개")
    print("  상위 5개:")
    for c in r.get('competitors', [])[:5]:
        mark = ' ← 내 업체' if c['is_my_place'] else ''
        print(f"    {c['rank']}위 {c['name']} | 리뷰 {c['review_count']}개{mark}")


# ─────────────────────────────────────────────────────────────
# 실행 예시
# ─────────────────────────────────────────────────────────────
if __name__ == '__main__':
    MY_PLACE_ID   = '11830966'
    MY_PLACE_NAME = '소들녘 광명점'
    KEYWORDS      = ['광명 소고기', '광명 갈비']

    print('📍 기본정보 수집...')
    summary = get_place_summary(MY_PLACE_ID, hint_name=MY_PLACE_NAME)
    if summary:
        print_summary(summary)
    else:
        print('수집 실패')

    print('\n📝 최신 리뷰 수집...')
    reviews = get_place_reviews(MY_PLACE_ID, page=1, size=5)
    for r in reviews[:3]:
        body = (r['content'] or '')[:50]
        print(f"  {r['author']} | {body}...")

    print('\n🔑 키워드 분석...')
    all_reviews = get_place_reviews(MY_PLACE_ID, page=1, size=50)
    kws = analyze_review_keywords(all_reviews, top_n=10)
    for k in kws:
        print(f"  {k['keyword']}: {k['count']}회")

    print('\n📊 순위 확인...')
    for kw in KEYWORDS:
        rank_data = get_place_rank(kw, MY_PLACE_ID, max_rank=30)
        print_rank(rank_data)
        time.sleep(1)
