import sys,os,re
sys.path.append(re.sub('blues_lib.*','blues_lib',os.path.realpath(__file__)))
from type.output.STDOut import STDOut
from type.model.Model import Model
from crawler.Crawler import Crawler
from namespace.CrawlerName import CrawlerName

class LoopCrawler(Crawler):

  NAME = CrawlerName.Engine.LOOP
    
  def _run(self)->STDOut:
    summary_conf = self._model.config[self._SUMMARY.value]
    urls:list[str] = summary_conf[self._URLS.value]
    rows:list[dict] = [] 
    for idx,url in enumerate(urls):
      is_last = idx == len(urls) - 1
      model:Model = self._get_model(url,is_last)
      stdout = super()._crawl(model)
      self._append(stdout,rows)

    if len(rows) == 0:
      stdout = STDOut(500,'Failed to crawl any data')
    else:
      stdout = STDOut(200,'ok',rows)
    return stdout
  
  def _setup(self):
    # override the should_stop method
    message = f'[{self.__class__.__name__}] Failed to crawl any data'
    if not self._model or not self._browser:
      raise Exception(f'{message} - model or browser is missing')

    summary:dict = self._model.config[self._SUMMARY.value]
    if not summary:
      raise Exception(f'{message} - {self._SUMMARY} is missing')
    
    urls:list[str] = summary.get(self._URLS.value)
    if not urls:
      raise Exception(f'{message} - {self._URLS} is missing')
  
  def _append(self,stdout:STDOut,rows:list[dict]):
    if stdout.code != 200:
      return  

    if row := stdout.data:
      if isinstance(row, list):
        rows.extend(row)
      else:
        rows.append(row) 
  
  def _get_model(self,url:str,is_last:bool)->Model:
    ori_meta = self._model.meta[self._BODY.value]
    ori_bizdata = self._model.bizdata
    bizdata = {
      **ori_bizdata,
      'url':url, # crawl the next url
    } 
    
    # remove the teardown to avoid to quit the browser before crawl all urls
    meta = {**ori_meta} 
    
    # the last one remain the teardown nodes : the browser will be quit after the last url crawled
    teardown_field = self._TEARDOWN.value
    if not is_last:
      del meta[teardown_field]

    return Model(meta,bizdata)