o
    5cUr                     @  s  d dl mZ d dlZd dlmZmZ d dlZd dlZd dl	m
  mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ d dlm Z  d dl!m"  m#Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5 erd dl6m7Z7 d dl8m9Z9 G dd dZ:d;ddZ;d;d<ddZ<d;d=ddZ=d>d"d#Z>d?d@d)d*Z?dAd+d,Z@dBd/d0ZA	%d?dCd3d4ZBdDd9d:ZCdS )E    )annotationsN)TYPE_CHECKINGcast)npt)PerformanceWarning)cache_readonly)find_stack_level)maybe_promote)ensure_platform_intis_1d_only_ea_dtypeis_extension_array_dtype
is_integerneeds_i8_conversion)ExtensionDtype)notna)factorize_from_iterable)ensure_wrapped_if_datetimelike)	DataFrame)Index
MultiIndex)Series)compress_group_indexdecons_obs_group_idsget_compressed_idsget_group_indexget_group_index_sorter)ExtensionArray)
FrozenListc                   @  s   e Zd ZdZd)d*dd	Zed+ddZed,ddZd-ddZdd Z	ed.ddZ
ed/ddZd0ddZd1dd Zd2d#d$Zed3d%d&Zed4d'd(ZdS )5
_Unstackera	  
    Helper class to unstack data / pivot with multi-level index

    Parameters
    ----------
    index : MultiIndex
    level : int or str, default last level
        Level to "unstack". Accepts a name for the level.
    fill_value : scalar, optional
        Default value to fill in missing values if subgroups do not have the
        same set of labels. By default, missing values will be replaced with
        the default fill value for that data type, NaN for float, NaT for
        datetimelike, etc. For integer types, by default data will converted to
        float and missing values will be set to NaN.
    constructor : object
        Pandas ``DataFrame`` or subclass used to create unstacked
        response.  If None, DataFrame will be used.

    Examples
    --------
    >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
    ...                                    ('two', 'a'), ('two', 'b')])
    >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
    >>> s
    one  a    1
         b    2
    two  a    3
         b    4
    dtype: int64

    >>> s.unstack(level=-1)
         a  b
    one  1  2
    two  3  4

    >>> s.unstack(level=0)
       one  two
    a    1    3
    b    2    4

    Returns
    -------
    unstacked : DataFrame
    Nindexr   returnNonec                 C  s   |d u rt }|| _| | _| j|| _d| jj| j v r dnd| _t| jj	| _
t| jj| _| j| j| _| j
| j| _|j	| j | _tdd | j
D }| jj}|| }|ttjjkrqtjd| dtt d |   d S )	Nr      r   c                 S  s   g | ]}|j qS  )size).0index_levelr$   r$   Q/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/reshape/reshape.py
<listcomp>z   s    z'_Unstacker.__init__.<locals>.<listcomp>z%The following operation may generate z& cells in the resulting pandas object.)
stacklevel)r   constructorremove_unused_levelsr    _get_level_numberlevelcodesliftlistlevelsnew_index_levelsnamesnew_index_namespopremoved_nameremoved_levelremoved_level_fullnpmaxr%   iinfoint32warningswarnr   r   _make_selectors)selfr    r.   r+   num_rowsnum_columns	num_cellsr$   r$   r(   __init__a   s*   

z_Unstacker.__init__-tuple[npt.NDArray[np.intp], list[np.ndarray]]c           
      C  s   | j }t| jj}t| jj}|d | ||d d   || g }tdd |d | ||d d   || g D }t||\}}t|}t||}	|	|fS )Nr#   c                 s      | ]}t |V  qd S Nlenr&   xr$   r$   r(   	<genexpr>       z2_Unstacker._indexer_and_to_sort.<locals>.<genexpr>)	r.   r1   r    r/   r2   tupler   rJ   r   )
rA   vr/   levsto_sortsizes
comp_indexobs_idsngroupsindexerr$   r$   r(   _indexer_and_to_sort   s   &4
z_Unstacker._indexer_and_to_sortlist[np.ndarray]c                   s   | j \ } fdd|D S )Nc                      g | ]}|  qS r$   take)r&   linerW   r$   r(   r)          z,_Unstacker.sorted_labels.<locals>.<listcomp>)rX   )rA   rR   r$   r^   r(   sorted_labels   s   
z_Unstacker.sorted_labelsvalues
np.ndarrayc                 C  s   | j \}}tj||dd}|S )Nr   )axis)rX   algostake_nd)rA   ra   rW   _sorted_valuesr$   r$   r(   _make_sorted_values   s   
z_Unstacker._make_sorted_valuesc           
      C  s   | j }| jd d }tdd |D }t||\}}t|}t|}| jj| j | j	 }||f| _
| jd ||  | j	 }tjt| j
td}	|	|d |	 t| jk r[td|| _|	| _|t|| _d S )Nr   c                 s  rG   rH   rI   rK   r$   r$   r(   rM      rN   z-_Unstacker._make_selectors.<locals>.<genexpr>dtypeTz0Index contains duplicate entries, cannot reshape)r3   r`   rO   r   rJ   r
   r    levshaper.   r0   
full_shaper:   zerosprodboolputsum
ValueErrorgroup_indexmasksearchsortedarange
compressor)
rA   
new_levelsremaining_labelslevel_sizesrT   rU   rV   strideselectorrt   r$   r$   r(   r@      s    
z_Unstacker._make_selectorsro   c                 C  s   t | j S rH   )ro   rt   allrA   r$   r$   r(   mask_all   s   z_Unstacker.mask_all2tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]c                 C  s6   t jt| jt jd}| j|dd\}}||dfS )Nri   r   
fill_valuer   )r:   rv   rJ   r    intpget_new_valuesany)rA   	dummy_arr
new_valuesrt   r$   r$   r(   arange_result   s   z_Unstacker.arange_resultr   c                 C  sn   |j dkr|d d tjf }|d u r|jd dkrtd| ||\}}| |}| j}| j||||j	dS )Nr#   z-must pass column labels for multi-column data)r    columnsrj   )
ndimr:   newaxisshaperr   r   get_new_columns	new_indexr+   rj   )rA   ra   value_columnsr   rf   r   r    r$   r$   r(   
get_result   s   


z_Unstacker.get_resultc                 C  s  |j dkr|d d tjf }| |}| j\}}|jd }|| }||f}| j}	| j}
|
rJt|rJ|	|||
dd	|}tj|td}||fS |j}|
rZ|j}tj||d}n*t|trq| }|j||d}||d d < nt||\}}tj||d}|| |j}tj|td}t|jr|d}|d}n|j|dd}t||	d|||||d t|jr|d}t|}||j}||fS )	Nr#      ri   i8F)copyu1zM8[ns])r   r:   r   rh   rl   r   rt   r   rJ   reshapeswapaxesonesro   rj   empty
isinstancer   construct_array_type_emptyr	   fillnamerm   r   viewastype
libreshapeunstackr   )rA   ra   r   rg   lengthwidthr{   result_widthresult_shapert   r   r   new_maskrj   clsr   r$   r$   r(   r      s^   









z_Unstacker.get_new_valuesr   Index | Nonec           	        s   |d u r!| j dkr| jj| jdS | jjd| jjd}|| jS t| j| j  }t|}t	t
|| t|trT|j| jf }|j| jf } fdd|jD }n|| jg}|j| jg} g}| j}|t|| t|||ddS )Nr   r   )itemc                   rZ   r$   r[   r&   lab
propagatorr$   r(   r)   A  r_   z._Unstacker.get_new_columns.<locals>.<listcomp>Fr2   r/   r4   verify_integrity)r0   r8   _renamer7   insert	_na_valuerenamerJ   r:   repeatrv   r   r   r2   r9   r4   r/   r   	_repeaterappendtile)	rA   r   levr{   r   rx   	new_names	new_codesrepeaterr$   r   r(   r   /  s,   

z_Unstacker.get_new_columnsc                 C  s^   t | jt | jkr| j| j}| jrt|dd}|S t | j| j }t|| j }|S )Nr   r   )rJ   r9   r8   get_indexerr0   r:   r   rv   )rA   r   r{   r$   r$   r(   r   R  s   z_Unstacker._repeaterc                   s    fdd j d d D }t jdkr9 jd |d }}|dk r.|t||j}|| jd S t	 j| jddS )Nc                   s   g | ]}|  jqS r$   )r\   rw   r   r~   r$   r(   r)   d      z(_Unstacker.new_index.<locals>.<listcomp>r   r#   r   Fr   )
r`   rJ   r3   r   r   r   r\   r   r5   r   )rA   result_codesr.   level_codesr$   r~   r(   r   a  s   z_Unstacker.new_index)r   N)r    r   r!   r"   )r!   rF   )r!   rY   )ra   rb   r!   rb   )r!   ro   )r!   r   )r!   r   rH   )r   r   )r!   rb   )r!   r   )__name__
__module____qualname____doc__rE   r   rX   r`   rh   r@   r   r   r   r   r   r   r   r$   r$   r$   r(   r   3   s(    -*



P#r   c                   sx  t  dkr| S | j jv r g fdd D   fddtjD }fdd D }fdd D }fdd D }fdd|D }fd	d|D }fd
d|D }	tdd |D }
t||
ddd}t|dd\}}t|||
|dd}|st	|dd}nt
||g ||g |	dg dd}t| tr|  }||_|jd|d}|}|}|}npt| jt
r| }tt  D ]} | |j|d}fdd D  q|S | jdd}||_|jd|d}t|tr|j}n|j}t|t
sJ |jd g| }| jjg| }|jd g}|D ]}|||jd  qt
|||dd}t|tr7||_|S ||_|S )Nr   c                   s   g | ]}  |qS r$   )r-   r&   ir    r$   r(   r)     r_   z%_unstack_multiple.<locals>.<listcomp>c                   s   g | ]}| vr|qS r$   r$   r   )clocsr$   r(   r)     r   c                      g | ]} j | qS r$   r2   r   r   r$   r(   r)     r_   c                   r   r$   r/   r   r   r$   r(   r)     r_   c                   r   r$   r4   r   r   r$   r(   r)     r_   c                   r   r$   r   r   r   r$   r(   r)     r_   c                   r   r$   r   r   r   r$   r(   r)     r_   c                   r   r$   r   r   r   r$   r(   r)     r_   c                 s  rG   rH   rI   rK   r$   r$   r(   rM     rN   z$_unstack_multiple.<locals>.<genexpr>F)sortxnull)r   )r   __placeholder__r   r   r   c                   s    g | ]}| k r
|n|d  qS )r#   r$   )r&   rP   )valr$   r(   r)          deepr   )rJ   r    r4   rangenlevelsrO   r   r   r   r   r   r   r   r   r   r   r2   r   r/   r   r\   )datar   r   rlocsclevelsccodescnamesrlevelsrcodesrnamesr   rs   comp_idsrU   recons_codesdummy_indexdummy	unstackedrx   r   r   resultr   unstcolsrecnew_columnsr$   )r   r    r   r(   _unstack_multipleu  sv   


r   objSeries | DataFramec                 C  s   t |ttfrt|dkrt| ||dS |d }t|s&|dks&| j|}t | tr?t | jt	r8t
| ||dS | jjddS t | jt	sPtdt| j dt| jr[t| ||S t| j|| jd	}|j| jd |d
S )Nr#   r   r   r   Fdropnaz'index must be a MultiIndex to unstack, z was passedr.   r+   r   r   )r   rO   r1   rJ   r   r   r    r-   r   r   _unstack_frameTstackrr   typer   rj   _unstack_extension_seriesr   _constructor_expanddimr   _values)r   r.   r   	unstackerr$   r$   r(   r     s,   


r   r   c                 C  sV   t | jtsJ t| j|| jd}| js!| jj||d}| |S |j| j	| j
|dS )Nr   r   r   )r   r    r   r   _constructor_can_fast_transpose_mgrr   r   r   r   )r   r.   r   r   mgrr$   r$   r(   r     s   

r   seriesr   r!   c                 C  s(   |   }|j||d}|jd|_|S )a'  
    Unstack an ExtensionArray-backed Series.

    The ExtensionDtype is preserved.

    Parameters
    ----------
    series : Series
        A Series with an ExtensionArray for values
    level : Any
        The level name or number.
    fill_value : Any
        The user-level (not physical storage) fill value to use for
        missing values introduced by the reshape. Passed to
        ``series.values.take``.

    Returns
    -------
    DataFrame
        Each column of the DataFrame will have the same dtype as
        the input Series.
    )r.   r   r   )to_framer   r   	droplevel)r   r.   r   dfr   r$   r$   r(   r     s   r   r   Tframer   ro   c                   s  dd }| j \} | j|}t| jtrt| ||dS t| jtr`t| jj} fdd| jj	D }|| j\}}	|
| |
t|	|  t| jj}
|

| jj t|||
dd}n*tt|| j| jf \}\}}	| t|	| f}t||| jj| jjgdd}| js| jrt| jj}|d }t|r| }|d	d |  D }t|| }n| j }n| j }|rt|}|| }|| }| j||d
S )z
    Convert DataFrame to Series with multi-level Index. Columns become the
    second level of the resulting hierarchical index

    Returns
    -------
    stacked : Series or DataFrame
    c                 S  s,   | j r| tt| fS t| \}}||fS rH   )	is_uniquer:   rv   rJ   r   )r    r/   
categoriesr$   r$   r(   	factorize)  s   zstack.<locals>.factorize)	level_numr   c                   rZ   r$   r   r   Kr$   r(   r)   8  r_   zstack.<locals>.<listcomp>Fr   r   c                 S     g | ]\}}|j qS r$   r   )r&   rf   colr$   r$   r(   r)   V  r_   r   )r   r   r-   r   r   _stack_multi_columnsr    r1   r2   r/   r   r:   r   ravelr4   r   zipmapr   r   _is_homogeneous_typedtypesr   r   r   _concat_same_typeitems"_reorder_for_extension_array_stackr   _constructor_sliced)r   r.   r   r  Nr  rx   r   clevclabr   r   r2   ilabr/   r  rj   arrr   rt   r$   r  r(   r     sP   



r   c                   s   t  fdd|D r }|D ]	}t|||d}q|S t dd |D r_ } fdd|D }tt|D ]'}|| }t|||d}g }|D ]}||krT||d  qF|| qF|}q5|S td)	Nc                 3  s    | ]	}| j jv V  qd S rH   )r   r4   r&   r   r   r$   r(   rM   l  s    z!stack_multiple.<locals>.<genexpr>r   c                 s  s    | ]}t |tV  qd S rH   )r   intr  r$   r$   r(   rM   r  s    c                   s   g | ]} j |qS r$   )r   r-   r  r  r$   r(   r)   x  r   z"stack_multiple.<locals>.<listcomp>r#   zTlevel should contain all level names or all level numbers, not a mixture of the two.)r}   r   r   rJ   r   rr   )r   r.   r   r   r   r    updated_levelotherr$   r  r(   stack_multiplei  s*    r  r   r   c                 C  s   t | jdkr| jd j| jd dS dd t| jdd | jdd D }t| }dd	 t|D }t| }tj	d
d t|| jD | jdd dS )zBCreates a MultiIndex from the first N-1 levels of this MultiIndex.r   r   r   c                   s"   g | ]\ } fd d|D qS )c                   s    g | ]}|d kr | ndqS )r   Nr$   )r&   cr   r$   r(   r)     r   z8_stack_multi_column_index.<locals>.<listcomp>.<listcomp>r$   )r&   r/   r$   r  r(   r)     s    z-_stack_multi_column_index.<locals>.<listcomp>Nr   c                 s  s    | ]\}}|V  qd S rH   r$   )r&   keyrf   r$   r$   r(   rM     rN   z,_stack_multi_column_index.<locals>.<genexpr>c                 S  s*   g | ]\}}d |vrt ||jdn|qS )Nri   )r   rj   )r&   new_levr   r$   r$   r(   r)     s    r   )
rJ   r2   r   r4   r  r/   	itertoolsgroupbyr   from_arrays)r   rQ   tuplesunique_tuplesnew_levsr$   r$   r(   _stack_multi_column_index  s   
r(  r  r  c           #   	     s^  ddd}| j dd}|j}t|tsJ ||jd	 kr@|}t||jd	 D ]}|||}||d	 |}	|||	}q&| |_}| sS|d
|}
|j|
d	d}|j}t	t|}t
|}i }|jd }tt|jd }|t|d }t||}t| g }|D ]}z|j|}W n ty   || Y qw t|tst|}n|j|j }| kr|jd d |j| f }||jjd |_|j|dj}n\| jrt| jjd
 r||j|  jjd
 }||j|  }|  !dd |" D }|j#\}}t$|| %||j&' }||}n| j(r ||j|  j}n	|jd d |f }|j)d	kr3|' }|||< qt|d
krD|*|}t|}t|j+trgt,|j+j}t,|j+j-} fdd|j+jD }nt.|j+\}} | g}|/ g}|j+j0g}|| |t1|| || jj-|  t|||dd}!| j2||!|d}"|r|"j3d
dd}"|"S )Nr  r  r   r   c                 S  s   | |j v r
|j |  S | S )z
        Logic for converting the level number to something we can safely pass
        to swaplevel.

        If `level_num` matches a column name return the name from
        position `level_num`, otherwise return `level_num`.
        r   )r  r   r$   r$   r(   _convert_level_number  s   

z3_stack_multi_columns.<locals>._convert_level_numberFr   r#   r   )r.   rc   r   )r   c                 S  r  r$   r  )r&   rf   rL   r$   r$   r(   r)     r_   z(_stack_multi_columns.<locals>.<listcomp>c                   rZ   r$   r  r   levsizer$   r(   r)     r_   r   )r    r   r}   )rc   how)r  r  r   r   )4r   r   r   r   r   r   	swaplevel_is_lexsorted
sort_indexr   r(  r2   sortedsetr/   r   rJ   r:   r\   get_locKeyErrorr   slicestopstartlocreindexra   r  r   r  ilocr   r  r  r   rv   r   r   r
  _is_mixed_typer   
differencer    r1   r4   r   r   r   r   r   r   )#r   r  r   r)  thismi_colsroll_columnsr   lev1lev2level_to_sortr   new_data
level_valsr   level_vals_nanlevel_vals_used	drop_colsr   r7  	slice_lenchunkvalue_slicerj   subsetr  r  idxrx   r   r   	old_codes
old_levelsr   r   r$   r*  r(   r	    s   















r	  r  r   n_rows	n_columnsc                 C  s&   t || ||j }| |S )a  
    Re-orders the values when stacking multiple extension-arrays.

    The indirect stacking method used for EAs requires a followup
    take to get the order correct.

    Parameters
    ----------
    arr : ExtensionArray
    n_rows, n_columns : int
        The number of rows and columns in the original DataFrame.

    Returns
    -------
    taken : ExtensionArray
        The original `arr` with elements re-ordered appropriately

    Examples
    --------
    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
    >>> _reorder_for_extension_array_stack(arr, 2, 3)
    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')

    >>> _reorder_for_extension_array_stack(arr, 3, 2)
    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
    )r:   rv   r   r   r
  r\   )r  rN  rO  rK  r$   r$   r(   r  0  s   !
r  rH   )r   r   )r   r   )r   r   r!   r   )r   T)r   r   r   ro   )T)r   r   r!   r   )r   r   r  r  r   ro   r!   r   )r  r   rN  r  rO  r  r!   r   )D
__future__r   r"  typingr   r   r>   numpyr:   pandas._libs.reshape_libsr   r   pandas._typingr   pandas.errorsr   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.castr	   pandas.core.dtypes.commonr
   r   r   r   r   pandas.core.dtypes.dtypesr   pandas.core.dtypes.missingr   pandas.core.algorithmscore
algorithmsrd   pandas.core.arrays.categoricalr   pandas.core.constructionr   pandas.core.framer   pandas.core.indexes.apir   r   pandas.core.seriesr   pandas.core.sortingr   r   r   r   r   pandas.core.arraysr   pandas.core.indexes.frozenr   r   r   r   r   r   r   r  r(  r	  r  r$   r$   r$   r(   <module>   sL      
DW%
!
J
) 