o
    5c                    @  s  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZ ddlZddlZddlmZmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6m7Z7 ddl8m9Z9m:Z:m;Z;m<Z< ddl=m>Z> ddl?m@  mAZB ddlCmDZD ddlEmFZF ddlGmHZH ddlImJZJmKZKmLZLmMZMmNZN ddlOmPZP ddlQmRZRmSZSmTZT ddlUmVZV ddlWmXZX ddlYmZZZ ddl[m\Z\ ddl]m^Z^ e
rddl_m`Z` eeaedef f ZbedZcG d d! d!eZdd8d&d'Zed9d*d+ZfefeXeHjgG d,d- d-eJeX ZhefeFeHjiG d.d/ d/eJeF Zjd:d6d7ZkdS );z
Define the SeriesGroupBy and DataFrameGroupBy
classes that hold the groupby interfaces (and some implementations).

These are user facing as the result of the ``df.groupby(...)`` operations,
which here returns a DataFrameGroupBy object.
    )annotations)abc)partial)dedent)TYPE_CHECKINGAnyCallableHashableIterableMapping
NamedTupleSequenceTypeVarUnioncastN)Intervallib	reduction)	ArrayLikeManager	Manager2DSingleManager)SpecificationError)AppenderSubstitutiondoc)find_stack_level)ensure_int64is_boolis_categorical_dtypeis_dict_likeis_integer_dtypeis_interval_dtype	is_scalar)isnanotna)
algorithmsnanops)GroupByApplymaybe_mangle_lambdasreconstruct_funcvalidate_func_kwargs)Categorical)!create_series_with_explicit_dtype)	DataFrame)base)GroupBy_agg_template_apply_docs_transform_template)warn_dropping_nuisance_columns_deprecated)get_grouper)Index
MultiIndexall_indexes_same)CategoricalIndex)Series)_shared_docs)maybe_use_numba)boxplot_frame_groupby)NDFrame.ScalarResultc                   @  s   e Zd ZU ded< ded< dS )NamedAggr	   column	AggScalaraggfuncN)__name__
__module____qualname____annotations__ rH   rH   Q/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/groupby/generic.pyr@   m   s   
 r@   namestrklasstype[DataFrame | Series]c                   s0    fdd}t | }|jpd|_ |_t|S )z
    Create a property for a GroupBy subclass to dispatch to DataFrame/Series.

    Parameters
    ----------
    name : str
    klass : {DataFrame, Series}

    Returns
    -------
    property
    c                   
   |   S N)_make_wrapperselfrJ   rH   rI   prop   s   
zgenerate_property.<locals>.prop )getattr__doc__rD   property)rJ   rL   rT   parent_methodrH   rS   rI   generate_propertyr   s
   
rZ   	allowlistfrozenset[str]c                   s    fdd}|S )a  
    Create GroupBy member defs for DataFrame/Series names in a allowlist.

    Parameters
    ----------
    klass : DataFrame or Series class
        class where members are defined.
    allowlist : frozenset[str]
        Set of names of klass methods to be constructed

    Returns
    -------
    class decorator

    Notes
    -----
    Since we don't want to override methods explicitly defined in the
    base class, any such name is skipped.
    c                   s0    D ]}t | |r
qt|}t| || q| S rO   )hasattrrZ   setattr)clsrJ   rT   r[   rL   rH   rI   pinner   s   

z*pin_allowlisted_properties.<locals>.pinnerrH   )rL   r[   ra   rH   r`   rI   pin_allowlisted_properties   s   rb   c                      s^  e Zd ZejZdPddZdQdd	ZdRddZe	dZ
eed jded ddS fddZeee
dddTdddddZeZdUddZdVddZ	 	 dWdXd(d)Zd*d+ Zedd,eedddd-d.Z	0dYdZd6d7Zd[d:d;Zd\d]d=d>Zd\d^d?d@Zeej fdAdBZ	 	/	 		/d_d`dFdGZ eej!dadbdLdMZ!eej"dadbdNdOZ"  Z#S )cSeriesGroupBymgrr   returnr:   c                 C  sF   |j dkrtt|}|}n
tt|}|d}| jj|| jjd}|S )N   r   rS   )ndimr   r   r   igetobj_constructorrJ   )rR   rd   singleserrH   rH   rI   _wrap_agged_manager   s   



z!SeriesGroupBy._wrap_agged_managerr   c                 C  s   | j }|j}|S rO   )_obj_with_exclusions_mgr)rR   rl   rk   rH   rH   rI   _get_data_to_aggregate   s   z$SeriesGroupBy._get_data_to_aggregateIterable[Series]c                 c  s    | j V  d S rO   )_selected_objrQ   rH   rH   rI   _iterate_slices   s   zSeriesGroupBy._iterate_slicesa  
    Examples
    --------
    >>> s = pd.Series([1, 2, 3, 4])

    >>> s
    0    1
    1    2
    2    3
    3    4
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).min()
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg('min')
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max'])
       min  max
    1    1    2
    2    3    4

    The output column names can be controlled by passing
    the desired column names and aggregations as keyword arguments.

    >>> s.groupby([1, 1, 2, 2]).agg(
    ...     minimum='min',
    ...     maximum='max',
    ... )
       minimum  maximum
    1        1        2
    2        3        4

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
    1    1.0
    2    3.0
    dtype: float64
    templateseriesseries_examples)inputexamplesc                   s   t  j|g|R i |S rO   )superapply)rR   funcargskwargs	__class__rH   rI   rz      s   zSeriesGroupBy.applyrx   rL   Nengineengine_kwargsc                O  s  t |r<|   | j}W d    n1 sw   Y  | j| |g|R d|i|}| jj}| jj|	 ||j
dS |d u }	d }
|	rLt|\}
}i }t|tr[t| ||i |S t|tjrwt|}| |}|	ru|
d usrJ |
|_|S t|}|r|s|st| | S | jjdkr| j|g|R i |S z| j|g|R i |W S  ty   | j|g|R i |}| jj}t||td Y S w )Nr   indexrJ   rf   )r   dtype_if_empty)r<   _group_selection_contextrr   _aggregate_with_numbato_framegrouperresult_indexri   rj   ravelrJ   r+   
isinstancerK   rV   r   r
   r)   _aggregate_multiple_funcscolumnscomget_cython_funcnkeys_python_agg_generalKeyError_aggregate_namedr-   object)rR   r{   r   r   r|   r}   dataresultr   
relabelingr   retcyfuncrH   rH   rI   	aggregate   sV   




zSeriesGroupBy.aggregater.   c                 C  s,  t |tr	tdtdd |D r dd |D }tt| }ng }|D ]}|t|p.| q$t||}i }t	|D ]\}\}}t
j||d}| |||< q<tdd | D rtdd	lm}	 |	| d
dd | D d}
|
S dd | D }| jj|d d}tdd |D |_| |}|S )Nznested renamer is not supportedc                 s  s    | ]
}t |ttfV  qd S rO   r   tuplelist.0xrH   rH   rI   	<genexpr>?  s    z:SeriesGroupBy._aggregate_multiple_funcs.<locals>.<genexpr>c                 S  s&   g | ]}t |ttfs||fn|qS rH   r   r   rH   rH   rI   
<listcomp>@  s   & z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<listcomp>)labelpositionc                 s  s    | ]}t |tV  qd S rO   )r   r.   r   rH   rH   rI   r   R  s    r   concatrf   c                 S     g | ]}|j qS rH   r   r   keyrH   rH   rI   r   V      )axiskeysc                 S     i | ]\}}|j |qS rH   r   r   r   valrH   rH   rI   
<dictcomp>Z      z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<dictcomp>r   c                 s      | ]}|j V  qd S rO   r   r   rH   rH   rI   r   \      )r   dictr   anynextzipappendr   get_callable_name	enumerater/   	OutputKeyr   valuespandasr   r   itemsri   _constructor_expanddimr6   r   _reindex_output)rR   argr   fresultsidxrJ   r{   r   r   res_dfindexed_outputoutputrH   rH   rI   r   7  s0   


z'SeriesGroupBy._aggregate_multiple_funcsr   "Mapping[base.OutputKey, ArrayLike]c                 C  s:   t |dksJ tt| }| j|}| jj|_|S )zN
        Wrap the dict result of a GroupBy aggregation into a Series.
        rf   )lenr   iterr   ri   rj   rJ   )rR   r   r   r   rH   rH   rI   _indexed_output_to_ndframea  s
   
z(SeriesGroupBy._indexed_output_to_ndframeFr   r   	list[Any]not_indexed_samebooloverride_group_keysDataFrame | Seriesc           	      C  s   t |dkr| jjg | jj| jj|jdS |dusJ t|d trA| jj}| jj	||d}| 
|}|j| jd}| jj|_|S t|d ttfrY| j|||d}| jj|_|S | jj|| jj| jjd}| 
|S )a  
        Wrap the output of SeriesGroupBy.apply into the expected result.

        Parameters
        ----------
        data : Series
            Input data for groupby operation.
        values : List[Any]
            Applied output for each group.
        not_indexed_same : bool, default False
            Whether the applied outputs are not indexed the same as the group axes.

        Returns
        -------
        DataFrame or Series
        r   )rJ   r   dtypeNr   dropnar   r   )r   r   rJ   )r   ri   rj   rJ   r   r   r   r   r   r   r   stackobservedr:   r.   _concat_objects)	rR   r   r   r   r   r   r   res_serr   rH   rH   rI   _wrap_applied_outputm  s6   



z"SeriesGroupBy._wrap_applied_outputc           	      O  sf   i }d}| D ]*\}}t |d| ||g|R i |}t|}|s,t||j d}|||< q|S )NFrJ   T)r   __setattr__libreductionextract_resultcheck_result_arrayr   )	rR   r{   r|   r}   r   initializedrJ   groupr   rH   rH   rI   r     s   

zSeriesGroupBy._aggregate_namedrL   c                O     | j |g|R ||d|S Nr   
_transformrR   r{   r   r   r|   r}   rH   rH   rI   	transform     zSeriesGroupBy.transformTr   howrK   numeric_onlyr   intc              
   K  sz   |dksJ | j }z| jjd|j||fi |}W n ty1 } zt| d|j d|d }~ww |j|| jj	|j
dS )Nr   r   z is not supported for z dtyper   )rr   r   _cython_operation_valuesNotImplementedError	TypeErrorr   rj   ri   r   rJ   )rR   r   r   r   r}   ri   r   errrH   rH   rI   _cython_transform  s   

zSeriesGroupBy._cython_transformr{   r   c                 O  s   t |sJ t| j}g }| jj| j| jdD ] \}}t|d| ||g|R i |}|	|||j
d q|rJddlm}	 |	|}
| |
}n| jjtjd}| jj|_|S )z2
        Transform with a callable func`.
        r   rJ   r   r   r   r   )callabletyperi   r   get_iteratorrr   r   r   r   r   r   pandas.core.reshape.concatr   _set_result_index_orderedrj   npfloat64rJ   )rR   r{   r|   r}   rL   r   rJ   r   resr   concatenatedr   rH   rH   rI   _transform_general  s    

z SeriesGroupBy._transform_generalr   c              
     s   t tr fddn fdddfddzfdd	D }W n ttfy< } ztd
|d}~ww ||}|S )ao  
        Return a copy of a Series excluding elements from groups that
        do not satisfy the boolean criterion specified by func.

        Parameters
        ----------
        func : function
            To apply to each group. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            if False, groups that evaluate False are filled with NaNs.

        Notes
        -----
        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.)
        1    2
        3    4
        5    6
        Name: B, dtype: int64

        Returns
        -------
        filtered : Series
        c                      t |  i S rO   rV   r   r|   r{   r}   rH   rI   <lambda>      z&SeriesGroupBy.filter.<locals>.<lambda>c                      | g R i S rO   rH   r  r  rH   rI   r        re   r   c                   s    | }|o	t |S rO   )r%   )r   b)wrapperrH   rI   true_and_notna  s   z,SeriesGroupBy.filter.<locals>.true_and_notnac                   s"   g | ]\}}|r  |qS rH   )
_get_index)r   rJ   r   )rR   r  rH   rI   r     s
    z(SeriesGroupBy.filter.<locals>.<listcomp>z'the filter must return a boolean resultN)re   r   )r   rK   
ValueErrorr   _apply_filter)rR   r{   r   r|   r}   indicesr   filteredrH   )r|   r{   r}   rR   r  r  rI   filter  s   
#

zSeriesGroupBy.filterc              	   C  s  | j j\}}}| jj}tj|dd\}}t||f}|| }|| }tjddt	|dd |dd kd  f }tjd|dd |dd kf }|dk}	|r[d||< d||	< nd||	tjd|	dd f @ < d||< tj
||jddd}
t|r|d dkr|
dd }|t| }n	|
}n|
dd }| j j}t|t|krtjt||
jd	|}}
|
||| < | jj||| jjd
}| j|ddS )z
        Return number of unique elements in the group.

        Returns
        -------
        Series
            Number of unique values within each group.
        Fsortr   rf   Nint64copyr   r   
fill_value)r   
group_infori   r   r&   	factorizer   lexsortr_nonzeroaddreduceatastyper   flatnonzeror   zerosr   rj   rJ   r   )rR   r   ids_r   codessorterr   incmaskoutr   rir   rH   rH   rI   nunique%  s6   	0"
zSeriesGroupBy.nuniquec                   s   t  jdi |S )NrH   )ry   describe)rR   r}   r~   rH   rI   r.  Z  s   zSeriesGroupBy.describe	normalizer  	ascendingc           "        s  ddl m} ddlm} | jj\}}	}	| jj}
| jj| jj	g }t
|
js.|d ur?t|s?| jtj||||d}||j_|S |dk| |
 }}
|d u r^tj|
dd\}}dd	 }n|t|
|dd
}|jj}|j|jjd|jd}dd	 }t|jrtt|}t|j|j|f}nt||f}|| || }}dt|dd  |d d kd  }tj d|f }t!|s|}||t"dd ||t"d dk}tj d|f }t!|
s|}d||< t#ttj |df d }t$tj%tj&'||d| jj(}fdd|D |||g }dd | jj)D |g }|r9|d dk* r+d}n| fdd|D }}|rm|+d}t#tj |t!|f }|re||dk }tj&,||d | }n|}|| }|r|d u r|r~||  n|| }t|r|n| |f}|| |d | }|d< |d ur>tj-t!|dd|d d D ]}tj d|dd  |d d kf O q. t!|d }t%t/|t0t/|g}1 d |d g} ||| ddd\}	}t2|dk|| d}|r"t|r|n| |d f}|| |d | }|d< d!fdd  fdd|d d D }|3|d  t4|||dd}!t5|jrPt6|}| jj7||!| jj	d S )"Nr   )get_join_indexers)cut)r/  r  r0  binsr  Tr  c                 S  s   | | S rO   rH   labr)  rH   rH   rI   r    s    z,SeriesGroupBy.value_counts.<locals>.<lambda>)include_lowest)
allow_fillr  c                 S  s   | | j jd S )Nr  )_multiindexr'  r4  rH   rH   rI   r    s    rf   )repeatsc                      g | ]} |qS rH   rH   r   level_codes)reprH   rI   r     s    z.SeriesGroupBy.value_counts.<locals>.<listcomp>c                 S  r   rH   )group_indexr   pingrH   rH   rI   r     r   Fc                   s   g | ]}|  qS rH   rH   r;  )r*  rH   rI   r     r  floatr   r   left)r  r   	lev_codes
np.ndarrayre   c                   s   t |   S rO   )r   repeat)rC  )diffnbinrH   rI   build_codes  s   z/SeriesGroupBy.value_counts.<locals>.build_codesc                   r:  rH   rH   )r   rC  )rH  rH   rI   r     r  )levelsr'  namesverify_integrityr   )rC  rD  re   rD  )8pandas.core.reshape.merger1  pandas.core.reshape.tiler2  r   r  ri   r   rJ  rJ   r   r   r   iterablerz   r:   value_countsr   r&   r  cat
categoriestaker'  	_na_valuer"   r   r   r  rB  rightr  r  r   slicerF  r   rE  r   r!  reconstructed_codes	groupingsallr"  atr$  sumarangetilecumsumwherer   r7   r!   r   rj   )"rR   r/  r  r0  r3  r   r1  r2  r%  r&  r   rJ  rl   r5  levllablab_intervalr(  	idchangesr   lchangesr)  r+  r'  rI  dmaccrP  r<  ncatrB  rT  mirH   )rH  rF  r*  rG  r=  rI   rO  ^  s   	



& 




*$zSeriesGroupBy.value_counts   firstnkeepc                 C  *   t tj||d}| j}| j||dd}|S N)rk  rl  Tr   )r   r:   nlargestrn   _python_apply_generalrR   rk  rl  r   r   r   rH   rH   rI   rp       zSeriesGroupBy.nlargestc                 C  rm  rn  )r   r:   	nsmallestrn   rq  rr  rH   rH   rI   rt    rs  zSeriesGroupBy.nsmallest)rd   r   re   r:   )re   r   re   rq   )re   r:   rO   re   r.   )r   r   re   r:   FF)
r   r:   r   r   r   r   r   r   re   r   )Tr   )r   rK   r   r   r   r   )r{   r   re   r:   T)r   r   )r   r   re   r:   )FTFNT)
r/  r   r  r   r0  r   r   r   re   r:   )ri  rj  )rk  r   rl  rK   re   r:   )$rD   rE   rF   r/   series_apply_allowlist_apply_allowlistrm   rp   rs   r   _agg_examples_docr   r2   formatrz   r   r1   r   aggr   r   r   r   r   r3   r   r   r  r  r-  r:   r.  rO  rp  rt  __classcell__rH   rH   r~   rI   rc      sX    


24

*:
75 rc   c                      s  e Zd ZejZedZee	edddddddddZ
e
Zded
dZdfddZdfddZ		dgdhddZdiddZejdfdjd$d%Zd&d' Zedd(eedddd)d*Zd+d, Zdkd1d2Zdld4d5Zdmd7d8Zdn fd:d;Zdddod=d>Zdpd@dAZdqdDdEZ drdHdIZ!dsdKdLZ"dtdMdNZ#dudOdPZ$dmdvdRdSZ%ee&dT dUdVdd6ejfdwdXdYZ'ee&dZ dUdVdd6ejfdwd[d\Z(e)Z*			6		6dxdydbdcZ+  Z,S )zDataFrameGroupBya  
    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {
    ...         "A": [1, 1, 2, 2],
    ...         "B": [1, 2, 3, 4],
    ...         "C": [0.362838, 0.227877, 1.267767, -0.562860],
    ...     }
    ... )

    >>> df
       A  B         C
    0  1  1  0.362838
    1  1  2  0.227877
    2  2  3  1.267767
    3  2  4 -0.562860

    The aggregation is for each column.

    >>> df.groupby('A').agg('min')
       B         C
    A
    1  1  0.227877
    2  3 -0.562860

    Multiple aggregations

    >>> df.groupby('A').agg(['min', 'max'])
        B             C
      min max       min       max
    A
    1   1   2  0.227877  0.362838
    2   3   4 -0.562860  1.267767

    Select a column for aggregation

    >>> df.groupby('A').B.agg(['min', 'max'])
       min  max
    A
    1    1    2
    2    3    4

    User-defined function for aggregation

    >>> df.groupby('A').agg(lambda x: sum(x) + 2)
        B	       C
    A
    1	5	2.590715
    2	9	2.704907

    Different aggregations per column

    >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'})
        B             C
      min max       sum
    A
    1   1   2  0.590715
    2   3   4  0.704907

    To control the output names with different aggregations per column,
    pandas supports "named aggregation"

    >>> df.groupby("A").agg(
    ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
       b_min     c_sum
    A
    1      1  0.590715
    2      3  0.704907

    - The keywords are the *output* column names
    - The values are tuples whose first element is the column to select
      and the second element is the aggregation to apply to that column.
      Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
      ``['column', 'aggfunc']`` to make it clearer what the arguments are.
      As usual, the aggregation can be a callable or a string alias.

    See :ref:`groupby.aggregate.named` for more.

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
          B
    A
    1   1.0
    2   3.0
    r.   r   Nr   c             
   O  s  t |r8|   | j}W d    n1 sw   Y  | j||g|R d|i|}| jj}| jj|||jdS t	|fi |\}	}}
}t
|}t| |||}| }t|s]|d ur]|S |	ro|d uro|jd d |f }|
|_|d u r| jjdkr| j|g|R i |S |s|r| j|g|R i |}na| jdkr| |}|S t| |gdi d}z| }W n ty } zdt|vr | |}W Y d }~n,d }~ww | j}t|tr| jj |_n|jj|jjtt|jjd |jd|_| js	|  | t!tt"||_#|S )	Nr   r   r   rf   rH   )r|   r}   z
no results)levelr  )$r<   r   rr   r   r   r   ri   rj   r   r*   r)   r(   r}  r    ilocr   r   _aggregate_framer   r  rK   r   r:   rn   r  
_set_namesrJ  r   rangenlevels	droplevelas_index_insert_inaxis_grouper_inplacer6   r   r   )rR   r{   r   r   r|   r}   r   r   r   r   r   orderopgbar   sobjrH   rH   rI   r   n  sh   




zDataFrameGroupBy.aggregatere   rq   c                 c  s`    | j }| jdkr|j}t|tr|j| jvr|V  d S | D ]\}}|| jv r*q |V  q d S Nrf   )rr   r   Tr   r:   rJ   
exclusionsr   )rR   ri   r   r   rH   rH   rI   rs     s   


z DataFrameGroupBy._iterate_slicesc                 O  s   | j jdkr
td| j}i }| jdkr1| j || jD ]\}}||g|R i |}|||< qn| jD ]}| j||d}	||	g|R i |}|||< q4| j j}
|j	d| j  }| j
j|||
d}| jdkrj|j}|S )Nrf   zNumber of keys must be 1r   ri   r  )r   r   AssertionErrorrn   r   r   r  	get_groupr   axesri   rj   r  )rR   r{   r|   r}   ri   r   rJ   r   fresgrp_dfr   other_axr+  rH   rH   rI   r    s&   




z!DataFrameGroupBy._aggregate_framec           
      O  sZ   | j }i }t| |D ]\}\}}|j|g|R i |||< q| j|}	|j|	_|	S rO   )rn   r   _iterate_column_groupbysr   ri   rj   r   )
rR   r{   r|   r}   ri   r   iitemsgbr   rH   rH   rI   _aggregate_item_by_item  s   z(DataFrameGroupBy._aggregate_item_by_itemFr   r   r   r   r   r   c                 C  s   t |dkr| jj| jj|jd}|j|jdd}|S tt	j
| d }|d u r,| j S t|tr9| j|||dS | jr@| jjnd }t|tjtfrT| jj||| jdS t|tsu| jrd| jj||dS | jj|| jgd}| | |S | |||||S )	Nr   r  Fr  r   r   r   )r   )r   ri   rj   r   r   r   r"  dtypesr   r   not_noner   r.   r   r  r   ndarrayr6   _constructor_sliced
_selectionr:   r  _wrap_applied_output_series)rR   r   r   r   r   r   first_not_none	key_indexrH   rH   rI   r     sB   




z%DataFrameGroupBy._wrap_applied_outputlist[Series]r   c                   sV  |  }tddti|  fdd|D }tdd |D }| jrJ| j| j}t|dko3|j	dk}	|	r>| j
|||dS |rJdd	lm}
 |
|S |sT| j
|d
|dS tdd |D }| jdkr|}|j }|jd u rdd |D }t|dkrt|d |_n|j}|}|j}|jtkr| }| jj|||d}| js| | | |S )Nr   c                   s   g | ]
}|d ur
|n qS rO   rH   r   backuprH   rI   r   =  s    z@DataFrameGroupBy._wrap_applied_output_series.<locals>.<listcomp>c                 s  r   rO   r   r   rH   rH   rI   r   ?  r   z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<genexpr>rf   r   r   r   Tc                 S  s   g | ]}t |qS rH   )r   asarrayr   vrH   rH   rI   r   f  r	  c                 S  s   h | ]}|j qS rH   rS   r  rH   rH   rI   	<setcomp>m  r   z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<setcomp>r  rH   )_construct_axes_dictr-   r   r8   squeezerr   	_get_axisr   r   r  r   r   r   r   vstackr   r  rJ   r   r  r   tolistri   rj   r  r  r   )rR   r   r   r  r  r   r}   all_indexed_sameapplied_indexsingular_seriesr   stacked_valuesr   r   rJ  r   rH   r  rI   r  1  sP   






z,DataFrameGroupBy._wrap_applied_output_seriesr   r   rK   r   bool | lib.NoDefaultr   r   c                   s   |dksJ   ||} }t|}|r|jdd}d fdd}|j|d	d
}	|	d|jd  t|	|k rDtt | j	
|	}
jdkrR|
j}
|
S )Nr   Fr  bvaluesr   re   c                   s   j jd|  dfi S )Nr   rf   )r   r   )r  r   r}   rR   rH   rI   arr_func  s
   z4DataFrameGroupBy._cython_transform.<locals>.arr_funcT)ignore_failuresrf   )r  r   re   r   )_resolve_numeric_onlyrp   r   get_numeric_datagrouped_reduceset_axisr  r4   r   ri   rj   r   r  )rR   r   r   r   r}   numeric_only_boolrd   orig_mgr_lenr  res_mgrr   rH   r  rI   r     s   
z"DataFrameGroupBy._cython_transformc              
   O  s  ddl m} g }| j}| jj|| jd}| j|g|R i |\}}	zt|\}
}W n	 ty3   Y nFw t	
|d|
 z| ||	|\}}W n! tyU   | || Y S  tyg } zd}t||d }~ww |jdkryt| j||}|| d}|D ]1\}
}|jdkrq}t	
|d|
 ||}|s|jdkr|j|jsd}t| j||}|| q}|rtjd	tt d
 | jdkr|jn|j}| jdkrdnd}||| jdd}|j||dd}| |S )Nr   r   r   rJ   z3transform must return a scalar value for each groupF   TzIn a future version of pandas, returning a DataFrame in groupby.transform will align with the input's index. Apply `.to_numpy()` to the result in the transform function to keep the current behavior and silence this warning.
stacklevelrf   )r   rK  )r   r  )r   r   rn   r   r   r   _define_pathsr   StopIterationr   r   _choose_pathr   _transform_item_by_itemr  size_wrap_transform_general_frameri   r   rg   r   equalswarningswarnFutureWarningr   r   reindexr   )rR   r{   r|   r}   r   appliedri   gen	fast_path	slow_pathrJ   r   pathr   r   msgemit_alignment_warningconcat_index
other_axisr   rH   rH   rI   r    sb   



	
z#DataFrameGroupBy._transform_generalr   c                O  r   r   r   r   rH   rH   rI   r     r   zDataFrameGroupBy.transformc                   s^   t tr fdd} fdd}||fS  fdd} fdd}||fS )Nc                   r  rO   r  r   r  rH   rI   r    r  z0DataFrameGroupBy._define_paths.<locals>.<lambda>c                      | j  fddjdS )Nc                   r  rO   r  r  r  rH   rI   r    r  BDataFrameGroupBy._define_paths.<locals>.<lambda>.<locals>.<lambda>r   rz   r   r  r|   r{   r}   rR   rH   rI   r        c                   r  rO   rH   r  r  rH   rI   r    r	  c                   r  )Nc                   r  rO   rH   r  r  rH   rI   r    r	  r  r   r  r  r  rH   rI   r    r  )r   rK   )rR   r{   r|   r}   r  r  rH   r  rI   r    s   
zDataFrameGroupBy._define_pathsr  r   r  r   c                 C  s   |}||}| j dkr||fS z||}W n ty     ty)   ||f Y S w t|tr;|j|js:||fS nt|trL|j|jsK||fS n||fS ||rW|}||fS r  )	ngroupsr  	Exceptionr   r.   r   r  r:   r   )rR   r  r  r   r  r   res_fastrH   rH   rI   r    s.   



zDataFrameGroupBy._choose_pathri   c           
   	   C  s   i }g }t | |D ]'\}\}}z	||||< W n ty,   tt| ddd Y qw || q|s9td|j|}| j	j
||jd}	||	_|	S )Nr   F)r   z)Transform function invalid for data typesr   )r   r  r   r   r4   r   r   r   rR  ri   rj   r   )
rR   ri   r  r   indsr  colnamer  r   r   rH   rH   rI   r    s"   

z(DataFrameGroupBy._transform_item_by_itemTc              	   O  s   g }| j }| jj|| jd}|D ]K\}}	t|	d| ||	g|R i |}
z|
 }
W n	 ty5   Y nw t|
sBt	|
rQt
|
rQ|
rPt|
rP|| | qtdt|
j d| ||S )a1  
        Return a copy of a DataFrame excluding filtered elements.

        Elements from groups are filtered if they do not satisfy the
        boolean criterion specified by func.

        Parameters
        ----------
        func : function
            Function to apply to each subframe. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            If False, groups that evaluate False are filled with NaNs.

        Returns
        -------
        filtered : DataFrame

        Notes
        -----
        Each subframe is endowed the attribute 'name' in case you need to know
        which group you are working on.

        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> grouped.filter(lambda x: x['B'].mean() > 3.)
             A  B    C
        1  bar  2  5.0
        3  bar  4  1.0
        5  bar  6  9.0
        r   rJ   zfilter function returned a z, but expected a scalar bool)rr   r   r   r   r   r   r  AttributeErrorr   r#   r$   r%   r   r  r   r   rD   r  )rR   r{   r   r|   r}   r  ri   r  rJ   r   r   rH   rH   rI   r  5  s&   (zDataFrameGroupBy.filter DataFrameGroupBy | SeriesGroupByc                   sF   | j dkr	tdt|trt|dkrtjdtt d t	 
|S )Nrf   z'Cannot subset columns when using axis=1zmIndexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.r  )r   r  r   r   r   r  r  r  r   ry   __getitem__)rR   r   r~   rH   rI   r  y  s   
zDataFrameGroupBy.__getitem__rg   c                 C  s   |dkr)|du r| j }t|| j| j| j| j| j|| j| j| j| j	| j
| j| jdS |dkrJ|du r6| j | }t|| j| j|| j| j| j	| j
| jd	S td)a  
        sub-classes to define
        return a sliced object

        Parameters
        ----------
        key : string / list of selections
        ndim : {1, 2}
            requested ndim of result
        subset : object, default None
            subset to act on
        r  N)r   r  r   r  	selectionr  r  
group_keysr  r   mutatedr   rf   )r  r   r  r  r  r  r   r   zinvalid ndim for _gotitem)ri   r  r   r   r  r  r  r  r  r  r   r  r   rc   r  )rR   r   rg   subsetrH   rH   rI   _gotitem  sD   
zDataFrameGroupBy._gotitemr   c                 C  s   | j }| jdkr|jjS |jS r  )rn   r   r  ro   )rR   ri   rH   rH   rI   rp     s   
z'DataFrameGroupBy._get_data_to_aggregater   Nonec                 C  s`   |j }tt| jjt| j tdd | jjD D ]\}}}|r-||vr-|d|| qd S )Nc                 S  r   rH   )in_axis)r   grprH   rH   rI   r     r   zCDataFrameGroupBy._insert_inaxis_grouper_inplace.<locals>.<listcomp>r   )r   r   reversedr   rJ  get_group_levelsrW  insert)rR   r   r   rJ   r_  r  rH   rH   rI   r    s   
z/DataFrameGroupBy._insert_inaxis_grouper_inplacer   r   c                 C  sT   dd |  D }tdd |D }|| jd| j j | j|}||_	|S )zQ
        Wrap the dict result of a GroupBy aggregation into a DataFrame.
        c                 S  r   rH   r   r   rH   rH   rI   r     r   z?DataFrameGroupBy._indexed_output_to_ndframe.<locals>.<dictcomp>c                 S  r   rH   r   r   rH   rH   rI   r     r   z?DataFrameGroupBy._indexed_output_to_ndframe.<locals>.<listcomp>rf   )
r   r6   r  rn   r  r   rJ  ri   rj   r   )rR   r   r   r   r   rH   rH   rI   r     s   z+DataFrameGroupBy._indexed_output_to_ndframerd   c                 C  s   | j s-|jd dkr|jd nd}tt|}|d| | j|}| | | }n| j	j
}|d| | j|}| jdkrE|j}| |jddS )Nr   rf   T)datetime)r  shaper6   r  r  ri   rj   r  _consolidater   r   r   r  r   _convert)rR   rd   rowsr   r   rH   rH   rI   rm     s   


z$DataFrameGroupBy._wrap_agged_managerc              	   c  sF    t |jD ]\}}|t|jd d |f || j| j| jdfV  qd S )N)r  r   r  r   )r   r   rc   r  r   r  r   )rR   ri   r  r  rH   rH   rI   r    s   z)DataFrameGroupBy._iterate_column_groupbysc                   sR   ddl m} |j} fdd| |D }t|s"tg || jjdS |||ddS )Nr   r   c                   s   g | ]\}} |qS rH   rH   )r   r&  col_groupbyr{   rH   rI   r      s    z>DataFrameGroupBy._apply_to_column_groupbys.<locals>.<listcomp>r   r   rf   )r   r   )r   r   r   r  r   r.   r   r   )rR   r{   ri   r   r   r   rH   r  rI   _apply_to_column_groupbys  s   
z*DataFrameGroupBy._apply_to_column_groupbysr   c                   s^   | j dkr|  fddS | j}| j fdd|d}| js-ttt||_| 	| |S )a  
        Return DataFrame with counts of unique elements in each position.

        Parameters
        ----------
        dropna : bool, default True
            Don't include NaN in the counts.

        Returns
        -------
        nunique: DataFrame

        Examples
        --------
        >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam',
        ...                           'ham', 'ham'],
        ...                    'value1': [1, 5, 5, 2, 5, 5],
        ...                    'value2': list('abbaxy')})
        >>> df
             id  value1 value2
        0  spam       1      a
        1   egg       5      b
        2   egg       5      b
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y

        >>> df.groupby('id').nunique()
              value1  value2
        id
        egg        1       1
        ham        1       2
        spam       2       1

        Check for rows with the same id but conflicting values:

        >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any())
             id  value1 value2
        0  spam       1      a
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y
        r   c                   rN   rO   r-  r  r   rH   rI   r  9     
 z*DataFrameGroupBy.nunique.<locals>.<lambda>c                   rN   rO   r  r  r   rH   rI   r  =  r  r  )
r   r   rn   r  r  r6   r  r   r   r  )rR   r   ri   r   rH   r   rI   r-  
  s   
-
zDataFrameGroupBy.nuniqueidxmaxz!True for axis=0, False for axis=1)numeric_only_defaultskipnac                   d   t   |tju r dkrd ndn| fdd}d|_| j|| jdd}| d|| |S )Nr   Fc                      t  2 t dd | jtjdd}|j}|   fdd|D }| j||j	dW  d    S 1 s9w   Y  d S )Nignorez".*numeric_only in DataFrame.argmaxargmaxr   r  r   c                   "   g | ]}|d kr | nt jqS r   r   nanr   r  r   rH   rI   r   e     " z9DataFrameGroupBy.idxmax.<locals>.func.<locals>.<listcomp>r   )
r  catch_warningsfilterwarnings_reducer'   	nanargmaxr   r  r  r   dfr   r  r   r   numeric_only_argr  r   rI   r{   X     

$z%DataFrameGroupBy.idxmax.<locals>.funcr  Tro  r.   _get_axis_numberr   
no_defaultrD   rq  rn   _maybe_warn_numeric_only_deprrR   r   r  r   r{   r   rH   r  rI   r  F     


zDataFrameGroupBy.idxmaxidxminc                   r   )Nr   Fc                   r  )Nr  z".*numeric_only in DataFrame.argminargminr  c                   r  r  r  r	  r   rH   rI   r     r
  z9DataFrameGroupBy.idxmin.<locals>.func.<locals>.<listcomp>r   )
r  r  r  r  r'   	nanargminr   r  r  r   r  r  r   rI   r{     r  z%DataFrameGroupBy.idxmin.<locals>.funcr  Tro  r  r  rH   r  rI   r  o  r  zDataFrameGroupBy.idxminr  Sequence[Hashable] | Noner/  r  r0  c              
     s  j dkr	td F j}dd jjD  tjtr0jj	}| v r+g njg}n>t
jj}	|dur]t
|t
 @ }
|
rNtd|
 d|	 }|r\td| dn|	 fd	d
tjjD }tjj}|D ]}t||j jd|d\}}}|t|j7 }qv|j|jjjd}tt| }tdd |D rdd
 |D }tj|dd
 |D d \}}|j|dd}|rtttjj|jj}|j|j|jjd d}|| }|!d}|rttjj}|j"|dj#|dd}j$r|}n6|rdnd}|j}t%&|j'}||v r,td| d||_	|(tt||_|) }||g |_|}|j*jddW  d   S 1 sXw   Y  dS ) u  
        Return a Series or DataFrame containing counts of unique rows.

        .. versionadded:: 1.4.0

        Parameters
        ----------
        subset : list-like, optional
            Columns to use when counting unique combinations.
        normalize : bool, default False
            Return proportions rather than frequencies.
        sort : bool, default True
            Sort by frequencies.
        ascending : bool, default False
            Sort in ascending order.
        dropna : bool, default True
            Don’t include counts of rows that contain NA values.

        Returns
        -------
        Series or DataFrame
            Series if the groupby as_index is True, otherwise DataFrame.

        See Also
        --------
        Series.value_counts: Equivalent method on Series.
        DataFrame.value_counts: Equivalent method on DataFrame.
        SeriesGroupBy.value_counts: Equivalent method on SeriesGroupBy.

        Notes
        -----
        - If the groupby as_index is True then the returned Series will have a
          MultiIndex with one level per input column.
        - If the groupby as_index is False then the returned DataFrame will have an
          additional column with the value_counts. The column is labelled 'count' or
          'proportion', depending on the ``normalize`` parameter.

        By default, rows that contain any NA values are omitted from
        the result.

        By default, the result will be in descending order so that the
        first element of each group is the most frequently-occurring row.

        Examples
        --------
        >>> df = pd.DataFrame({
        ...    'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
        ...    'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
        ...    'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
        ... })

        >>> df
                gender  education   country
        0       male    low         US
        1       male    medium      FR
        2       female  high        US
        3       male    low         FR
        4       female  high        FR
        5       male    low         FR

        >>> df.groupby('gender').value_counts()
        gender  education  country
        female  high       FR         1
                           US         1
        male    low        FR         2
                           US         1
                medium     FR         1
        dtype: int64

        >>> df.groupby('gender').value_counts(ascending=True)
        gender  education  country
        female  high       FR         1
                           US         1
        male    low        US         1
                medium     FR         1
                low        FR         2
        dtype: int64

        >>> df.groupby('gender').value_counts(normalize=True)
        gender  education  country
        female  high       FR         0.50
                           US         0.50
        male    low        FR         0.50
                           US         0.25
                medium     FR         0.25
        dtype: float64

        >>> df.groupby('gender', as_index=False).value_counts()
           gender education country  count
        0  female      high      FR      1
        1  female      high      US      1
        2    male       low      FR      2
        3    male       low      US      1
        4    male    medium      FR      1

        >>> df.groupby('gender', as_index=False).value_counts(normalize=True)
           gender education country  proportion
        0  female      high      FR        0.50
        1  female      high      US        0.50
        2    male       low      FR        0.50
        3    male       low      US        0.25
        4    male    medium      FR        0.25
        rf   z1DataFrameGroupBy.value_counts only handles axis=0c                 S  s   h | ]}|j r|jqS rH   )r  rJ   r   groupingrH   rH   rI   r    s
    z0DataFrameGroupBy.value_counts.<locals>.<setcomp>NzKeys z0 in subset cannot be in the groupby column keys.z) in subset do not exist in the DataFrame.c                   s4   g | ]\}}| vr|v rj jd d |f qS rO   )rr   r  )r   r   rJ   in_axis_namesrR   	subsettedrH   rI   r   *  s    z1DataFrameGroupBy.value_counts.<locals>.<listcomp>F)r   r   r  r   r   )r  r   r   c                 s  s(    | ]}t |jttfo|j V  qd S rO   )r   grouping_vectorr,   r9   	_observedr  rH   rH   rI   r   H  s    
z0DataFrameGroupBy.value_counts.<locals>.<genexpr>c                 S  r   rH   )r   r?  rH   rH   rI   r   M  r   c                 S  r   rH   rS   r?  rH   rH   rI   r   O  r   )rJ  r   r  )r  r   rZ  g        )r0  )r  sort_remaining
proportioncountzColumn label 'z' is duplicate of result columnrO  )method)+r   r   r   ri   r   rW  r   rr   r:   rJ   setr   r  r   r   r5   r  groupbyr   r   r   r  r   r7   from_product	sortlevelr  r  r   r   r  r  r   fillnasort_values
sort_indexr  r   fill_missing_namesrJ  	set_namesreset_index__finalize__)rR   r  r/  r  r0  r   r  rJ   r   unique_colsclashingdoesnt_existrW  r   r   r&  gbresult_serieslevels_listmulti_indexrI  indexed_group_sizeindex_levelr   r   r   result_framerH   r   rI   rO    s   
o






&zDataFrameGroupBy.value_countsrO   ru  rv  rw  )r   r.   r   r   r   r   r   r   )r   r  r   r   r   r   re   r   )r   rK   r   r  r   r   re   r.   )r  r   r  r   r   r.   )ri   r.   re   r.   rx  )re   r  )rg   r   )re   r   )r   r.   re   r  )r   r   re   r.   )rd   r   re   r.   )ri   r   )ri   r   re   r.   )r   r   re   r.   )r  r   r   r  re   r.   )NFTFT)r  r  r/  r   r  r   r0  r   r   r   re   r   )-rD   rE   rF   r/   dataframe_apply_allowlistrz  r   r{  r   r1   r   r}  rs   r  r  r   r  r   r  r   r  r   r   r3   r   r  r  r  r  r  r  rp   r  r   rm   r  r  r-  r;   r  r  r=   boxplotrO  r~  rH   rH   r~   rI   r    st    ]K



;Q'>

$
D
1





<%%r  ri   r.   r   r   r   re   c                 C  s   ddl m} t|trD|j| jr'||gt|j dd}|j|_|j|_n| jt	
|jt|jdf|j|jd}t|tsBJ |S |S )Nr   r   rf   r   r  )r   r   r   r:   r   is_r   r   rj   r   r\  r   r.   )ri   r   r   r   	res_framerH   rH   rI   r    s   

r  )rJ   rK   rL   rM   )rL   rM   r[   r\   )ri   r.   r   r.   r   r   re   r.   )lrW   
__future__r   collectionsr   	functoolsr   textwrapr   typingr   r   r   r	   r
   r   r   r   r   r   r   r  numpyr   pandas._libsr   r   r   r   pandas._typingr   r   r   r   pandas.errorsr   pandas.util._decoratorsr   r   r   pandas.util._exceptionsr   pandas.core.dtypes.commonr   r   r   r    r!   r"   r#   pandas.core.dtypes.missingr$   r%   pandas.corer&   r'   pandas.core.applyr(   r)   r*   r+   pandas.core.arrays.categoricalr,   pandas.core.commoncorecommonr   pandas.core.constructionr-   pandas.core.framer.   pandas.core.groupbyr/   pandas.core.groupby.groupbyr0   r1   r2   r3   r4   pandas.core.groupby.grouperr5   pandas.core.indexes.apir6   r7   r8   pandas.core.indexes.categoryr9   pandas.core.seriesr:   pandas.core.shared_docsr;   pandas.core.util.numba_r<   pandas.plottingr=   pandas.core.genericr>   rK   rB   r?   r@   rZ   rb   ry  rc   r>  r  r  rH   rH   rH   rI   <module>   sl    4$	


&    
`        z