diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ce3c17..656e62e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,8 +89,6 @@ jobs: python-version: "3.11" - os: macos-latest python-version: "3.11" - - os: windows-latest - python-version: "3.11" - os: ubuntu-latest python-version: "3.12" @@ -116,6 +114,9 @@ jobs: pip install -e .[dev] - name: Run tests with pytest + env: + KRX_ID: ${{ secrets.KRX_ID }} + KRX_PW: ${{ secrets.KRX_PW }} run: | pytest -v --cov=pykrx --cov-report=xml --cov-report=term @@ -147,6 +148,9 @@ jobs: pip install -e .[dev] - name: Run tests with pytest + env: + KRX_ID: ${{ secrets.KRX_ID }} + KRX_PW: ${{ secrets.KRX_PW }} run: | pytest -v --cov=pykrx --cov-report=xml --cov-report=term diff --git a/pykrx/stock/future_api.py b/pykrx/stock/future_api.py index a499aaa..fefc361 100644 --- a/pykrx/stock/future_api.py +++ b/pykrx/stock/future_api.py @@ -102,8 +102,8 @@ def get_future_ohlcv_by_ticker( # tickers = get_future_ticker_list() # print(tickers) - # names = get_future_ticker_name('KRDRVFUEST') - # print(names) + names = get_future_ticker_name("KRDRVFUEST") + print(names) df = get_future_ohlcv("20220902", "KRDRVFUEST") print(df) diff --git a/pykrx/website/comm/__init__.py b/pykrx/website/comm/__init__.py index 328ef12..765001b 100644 --- a/pykrx/website/comm/__init__.py +++ b/pykrx/website/comm/__init__.py @@ -1,3 +1,19 @@ +from pykrx.website.comm.auth import ( + build_krx_session, + get_auth_session, + login_krx, + warmup_krx_session, +) from pykrx.website.comm.util import dataframe_empty_handler, singleton +from pykrx.website.comm.webio import get_session, set_session -__all__ = ["dataframe_empty_handler", "singleton"] +__all__ = [ + "dataframe_empty_handler", + "singleton", + "get_auth_session", + "get_session", + "set_session", + "build_krx_session", + "login_krx", + "warmup_krx_session", +] diff --git a/pykrx/website/comm/auth.py b/pykrx/website/comm/auth.py new file mode 100644 index 0000000..25ade5b --- /dev/null +++ b/pykrx/website/comm/auth.py @@ -0,0 +1,239 @@ +import os +import time +from dataclasses import dataclass, field +from typing import Optional + +import requests + +LOGIN_PAGE = "https://data.krx.co.kr/contents/MDC/COMS/client/MDCCOMS001.cmd" +LOGIN_JSP = "https://data.krx.co.kr/contents/MDC/COMS/client/view/login.jsp?site=mdc" +LOGIN_URL = "https://data.krx.co.kr/contents/MDC/COMS/client/MDCCOMS001D1.cmd" +USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" +) + +# Global session reference (set by webio.py) +_auth_session: Optional["KRXSession"] = None + + +@dataclass +class KRXSession: + """KRX 인증 세션 관리 클래스 + + JSESSIONID 쿠키를 저장하고 만료 시간을 추적하여 + 자동 재로그인 및 헤더 추가를 관리합니다. + """ + + session: requests.Session = field(default_factory=requests.Session) + login_time: float = field(default_factory=time.time) + expiry_time: float = field( + default_factory=lambda: time.time() + 3600 + ) # 1 시간 만료 + is_authenticated: bool = False + cookies: dict = field(default_factory=dict) + + def is_valid(self, buffer_seconds: int = 300) -> bool: + """세션이 유효한지 확인 (버퍼 시간 포함)""" + return self.is_authenticated and time.time() < ( + self.expiry_time - buffer_seconds + ) + + def refresh(self, login_id: str, login_pw: str) -> bool: + """세션 갱신 (재로그인)""" + try: + self.session.close() + except Exception as e: + print(f"Session close error: {e}") + pass + + self.session = requests.Session() + warmup_krx_session(self.session) + + success = login_krx(login_id, login_pw, self.session) + + if success: + self.login_time = time.time() + self.expiry_time = time.time() + 3600 # 1 시간 만료 + self.is_authenticated = True + + # 쿠키 추출 및 저장 + for cookie in self.session.cookies: + self.cookies[cookie.name] = { + "value": cookie.value, + "domain": cookie.domain, + "path": cookie.path, + "secure": cookie.secure, + "expires": cookie.expires or 0, + } + + return success + + def get_headers(self) -> dict: + """현재 세션에 적합한 헤더 반환""" + return { + "User-Agent": USER_AGENT, + "Referer": "https://data.krx.co.kr/contents/MDC/MDI/outerLoader/index.cmd", + "X-Requested-With": "XMLHttpRequest", + "Cookie": "; ".join( + [f"{name}={info['value']}" for name, info in self.cookies.items()] + ) + if self.cookies + else "", + } + + def get(self, url: str, headers: dict = None, params: dict = None, **kwargs): + """GET 요청 전송""" + if headers is None: + headers = self.get_headers() + else: + # 기본 헤더와 병합 + default_headers = self.get_headers() + default_headers.update(headers) + headers = default_headers + + return self.session.get(url, headers=headers, params=params, **kwargs) + + def post(self, url: str, headers: dict = None, data: dict = None, **kwargs): + """POST 요청 전송""" + if headers is None: + headers = self.get_headers() + else: + # 기본 헤더와 병합 + default_headers = self.get_headers() + default_headers.update(headers) + headers = default_headers + + return self.session.post(url, headers=headers, data=data, **kwargs) + + +def set_auth_session(session: KRXSession | None) -> None: + """Set the global auth session (called by webio.py).""" + global _auth_session + _auth_session = session + + +def warmup_krx_session(session: requests.Session) -> None: + session.get(LOGIN_PAGE, headers={"User-Agent": USER_AGENT}, timeout=15) + session.get( + LOGIN_JSP, + headers={"User-Agent": USER_AGENT, "Referer": LOGIN_PAGE}, + timeout=15, + ) + + +def login_krx( + login_id: str, login_pw: str, session: requests.Session | None = None +) -> bool: + """ + KRX 로그인 후 세션 쿠키(JSESSIONID) 를 갱신합니다. + + 로그인 흐름: + 1. GET MDCCOMS001.cmd → 초기 JSESSIONID 발급 + 2. GET login.jsp → iframe 세션 초기화 + 3. POST MDCCOMS001D1.cmd → 실제 로그인 + 4. CD011(중복 로그인) → skipDup=Y 추가 후 재전송 + """ + if session is None: + session = requests.Session() + + warmup_krx_session(session) + + payload = { + "mbrNm": "", + "telNo": "", + "di": "", + "certType": "", + "mbrId": login_id, + "pw": login_pw, + } + headers = {"User-Agent": USER_AGENT, "Referer": LOGIN_PAGE} + + resp = session.post(LOGIN_URL, data=payload, headers=headers, timeout=15) + data = resp.json() + error_code = data.get("_error_code", "") + error_message = data.get("_error_message", "") + + # CD010: 패스워드 변경 필요 + if error_code == "CD010": + print("⚠️ KRX 비밀번호 변경이 필요합니다.") + print(f" 오류 메시지: {error_message}") + print(" https://www.krx.co.kr 에서 비밀번호를 변경한 후 다시 시도하세요.") + return False + + # CD011: 중복 로그인 (skipDup 처리) + if error_code == "CD011": + payload["skipDup"] = "Y" + resp = session.post(LOGIN_URL, data=payload, headers=headers, timeout=15) + data = resp.json() + error_code = data.get("_error_code", "") + error_message = data.get("_error_message", "") + + return error_code == "CD001" # CD001 = 정상 + + +def build_krx_session( + login_id: str = os.getenv("KRX_ID"), login_pw: str = os.getenv("KRX_PW") +) -> KRXSession | None: + """ + KRX 로그인 세션을 생성하고 반환합니다. + + 환경 변수 KRX_ID, KRX_PW 가 설정되어 있으면 자동으로 로그인합니다. + 로그인 성공 시 KRXSession 객체를 반환하며, 실패 시 None 을 반환합니다. + """ + if not (login_id and login_pw): + print("KRX 로그인 실패: KRX_ID 또는 KRX_PW 환경 변수가 설정되지 않았습니다.") + return None + + print("KRX 로그인 시도...") + print(f" 로그인 ID: {login_id}") + + krxs = KRXSession() + success = krxs.refresh(login_id, login_pw) + + if success: + print("KRX 로그인 완료.") + print( + f" 로그인 시간: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(krxs.login_time))}" + ) + print( + f" 만료 시간: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(krxs.expiry_time))}" + ) + return krxs + else: + print("KRX 로그인 실패: 자격 증명을 확인하세요.") + return None + + +def get_auth_session() -> KRXSession | None: + """ + 현재 활성화된 KRX 세션을 반환합니다. + + 환경 변수 KRX_ID, KRX_PW 가 설정되어 있지 않으면 None 을 반환합니다. + 세션이 만료되었을 경우 자동으로 재로그인을 시도합니다. + """ + global _auth_session + + if _auth_session is None: + # 환경 변수에서 다시 시도 + login_id = os.getenv("KRX_ID") + login_pw = os.getenv("KRX_PW") + if login_id and login_pw: + _auth_session = build_krx_session(login_id, login_pw) + return _auth_session + + # 세션 만료 확인 및 재로그인 + if not _auth_session.is_valid(): + login_id = os.getenv("KRX_ID") + login_pw = os.getenv("KRX_PW") + if login_id and login_pw: + print("KRX 세션 만료, 재로그인 시도...") + if _auth_session.refresh(login_id, login_pw): + print("KRX 세션 갱신 완료.") + else: + print("KRX 세션 갱신 실패.") + return None + else: + return None + + return _auth_session diff --git a/pykrx/website/comm/util.py b/pykrx/website/comm/util.py index 6e1b8cf..8f9b56e 100644 --- a/pykrx/website/comm/util.py +++ b/pykrx/website/comm/util.py @@ -1,3 +1,4 @@ +import json import logging from pandas import DataFrame @@ -7,7 +8,14 @@ def dataframe_empty_handler(func): def wrapper(*args, **kwargs): try: return func(*args, **kwargs) - except (AttributeError, KeyError, TypeError, ValueError) as e: + except ( + AttributeError, + KeyError, + TypeError, + ValueError, + json.JSONDecodeError, + ) as e: + print(f"Error occurred in {func.__name__}: {e}") logging.info(args, kwargs) logging.info(e) return DataFrame() diff --git a/pykrx/website/comm/webio.py b/pykrx/website/comm/webio.py index 09281df..c073a3a 100644 --- a/pykrx/website/comm/webio.py +++ b/pykrx/website/comm/webio.py @@ -2,16 +2,53 @@ import requests +from pykrx.website.comm.auth import ( + build_krx_session, + get_auth_session, + set_auth_session, +) + +# Initialize session at module load time +_session = build_krx_session() +# Set the auth session for get_auth_session() to work +set_auth_session(_session) + + +def set_session(session) -> requests.Session | None: + """Set the global session (deprecated, use KRXSession)""" + global _session + _session = session + + +def get_session() -> requests.Session | None: + """Get the current KRX session with automatic refresh if expired.""" + return get_auth_session() + class Get: def __init__(self): self.headers = { "User-Agent": "Mozilla/5.0", "Referer": "https://data.krx.co.kr/contents/MDC/MDI/outerLoader/index.cmd", + "X-Requested-With": "XMLHttpRequest", } def read(self, **params): - resp = requests.get(self.url, headers=self.headers, params=params) + krxs = get_session() + + if krxs is None: + # 세션이 없으면 새 요청 생성 + session = requests.Session() + resp = session.get(self.url, headers=self.headers, params=params) + else: + # KRXSession 의 헤더 사용 (쿠키 포함) + headers = krxs.get_headers() + # 커스텀 헤더 병합 + for key, value in self.headers.items(): + headers[key] = value + + resp = krxs.session.get(self.url, headers=headers, params=params) + return resp @property @@ -25,12 +62,27 @@ def __init__(self, headers=None): self.headers = { "User-Agent": "Mozilla/5.0", "Referer": "https://data.krx.co.kr/contents/MDC/MDI/outerLoader/index.cmd", + "X-Requested-With": "XMLHttpRequest", } if headers is not None: self.headers.update(headers) def read(self, **params): - resp = requests.post(self.url, headers=self.headers, data=params) + krxs = get_session() + + if krxs is None: + # 세션이 없으면 새 요청 생성 + session = requests.Session() + resp = session.post(self.url, headers=self.headers, data=params) + else: + # KRXSession 의 헤더 사용 (쿠키 포함) + headers = krxs.get_headers() + # 커스텀 헤더 병합 + for key, value in self.headers.items(): + headers[key] = value + + resp = krxs.session.post(self.url, headers=headers, data=params) + return resp @property diff --git a/pykrx/website/krx/items/wrap.py b/pykrx/website/krx/items/wrap.py index ff9af3d..755c399 100644 --- a/pykrx/website/krx/items/wrap.py +++ b/pykrx/website/krx/items/wrap.py @@ -51,6 +51,7 @@ def get_item_gold_ticker(date: str) -> str: strtDd="20251107", endDd="20251125", ) + print(df_gold_price) # get_item_gold_price("KRD040200002", "20251107", "20251125") # TRD_DD TDD_CLSPRC FLUC_TP_CD CMPPREVDD_PRC FLUC_RT TDD_OPNPRC TDD_HGPRC TDD_LWPRC ACC_TRDVOL ACC_TRDVAL # 0 2025/11/25 197,300 1 3,850 1.99 196,960 197,500 196,290 574,628 112,714,602,040 diff --git a/tests/conftest.py b/tests/conftest.py index 86c9267..5b1dd7c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,10 @@ from pathlib import Path import pytest -import vcr +import vcr as vcrpy +import yaml + +from pykrx.website.comm import webio as _webio IGNORED_DATE_KEYS = { "strtDd", @@ -126,11 +129,57 @@ def before_record_request(request): # Register custom matchers globally for ALL VCR instances # This needs to be done at module level before pytest-vcr creates VCR instances -_global_vcr = vcr.VCR() +_global_vcr = vcrpy.VCR() _global_vcr.register_matcher("uri_ignore_dates", uri_without_dates) _global_vcr.register_matcher("body_ignore_dates", form_body_matcher) +@pytest.fixture(scope="session", autouse=True) +def init_singletons(tmp_path_factory): + """세션 시작 시 singleton을 common cassette로 미리 초기화. + + VCR은 중첩 카세트에서 가장 안쪽(innermost) 카세트만 사용하므로 + 모든 common cassette를 하나의 YAML 파일로 합쳐서 단일 컨텍스트를 사용한다. + singleton이 미리 초기화되면 개별 테스트에서 HTTP 요청이 불필요하다. + """ + # Merge all common cassette interactions into one file + all_interactions = [] + for fname in ["etx_ticker_init.yaml", "finder_init.yaml", "index_kind_init.yaml"]: + path = Path(COMMON_CASSETTE_DIR) / fname + if path.exists(): + data = yaml.safe_load(path.read_text(encoding="utf-8")) + all_interactions.extend(data.get("interactions", [])) + + combined_path = tmp_path_factory.mktemp("vcr") / "combined_init.yaml" + combined_path.write_text( + yaml.dump({"interactions": all_interactions, "version": 1}, allow_unicode=True), + encoding="utf-8", + ) + + _vcr = vcrpy.VCR() + _vcr.register_matcher("uri_ignore_dates", uri_without_dates) + _vcr.register_matcher("body_ignore_dates", form_body_matcher) + + # Use plain requests inside VCR context so cassettes (recorded without auth) + # match correctly on all platforms including Windows. + original_session = _webio.get_session() + _webio.set_session(None) + try: + with _vcr.use_cassette( + str(combined_path), + record_mode="none", + allow_playback_repeats=True, + match_on=["uri_ignore_dates", "method", "body_ignore_dates"], + ): + from pykrx.website.krx.etx.ticker import EtxTicker + from pykrx.website.krx.market.ticker import StockTicker + + EtxTicker() + StockTicker() + finally: + _webio.set_session(original_session) + + @pytest.fixture(scope="module") def vcr_cassette_dir(): """pytest-vcr: cassette directory location"""