o
    5cB                  	   @  sJ  U d dl mZ d dlZd dlZd dlmZ d dlZd dlZ	d dl
mZ d dlmZmZmZmZmZ d dlmZmZ ejejejejejdejejejejejdej ej!ej"dej#d	e$iiZ%d
e&d< d=d>ddZ'd=d?ddZ(d@ddZ)dAddZ*dBddZ+dAdd Z,d!d" Z-dAd#d$Z.	 	dCdDd.d/Z/	 dEdFd3d4Z0	d=dGd;d<Z1dS )H    )annotationsN)Any)PandasColumn)BufferColumnColumnNullType	DataFrame	DtypeKind)ArrowCTypes
Endianness)          @   )r   r   r   zdict[DtypeKind, dict[int, Any]]
_NP_DTYPESTreturnpd.DataFramec                 C  s2   t | tjr| S t| dstdt| j|dS )a  
    Build a ``pd.DataFrame`` from any DataFrame supporting the interchange protocol.

    Parameters
    ----------
    df : DataFrameXchg
        Object supporting the interchange protocol, i.e. `__dataframe__` method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pd.DataFrame
    __dataframe__z#`df` does not support __dataframe__)
allow_copy)
isinstancepdr   hasattr
ValueError_from_dataframer   )dfr    r   \/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/interchange/from_dataframe.pyfrom_dataframe   s
   
r   r   DataFrameXchgc                 C  s   g }|   D ]}t|}|| q|st|dkrtdt|dkr)|d }n	tj|dddd}| jdd}|dur@||_	|S )	a  
    Build a ``pd.DataFrame`` from the DataFrame interchange object.

    Parameters
    ----------
    df : DataFrameXchg
        Object supporting the interchange protocol, i.e. `__dataframe__` method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pd.DataFrame
       zHTo join chunks a copy is required which is forbidden by allow_copy=Falser   TF)axisignore_indexcopyzpandas.indexN)

get_chunksprotocol_df_chunk_to_pandasappendlenRuntimeErrorr   concatmetadatagetindex)r   r   
pandas_dfschunk	pandas_df	index_objr   r   r   r   8   s   
r   c                 C  s  i }g }|   D ]q}t|tstd| d||v r#td| d| |}|jd }|tjtjtj	tj
fv rBt|\||< }n2|tjkrPt|\||< }n$|tjkr^t|\||< }n|tjkrlt|\||< }ntd| d|| qt|}||jd< |S )z
    Convert interchange protocol chunk to ``pd.DataFrame``.

    Parameters
    ----------
    df : DataFrameXchg

    Returns
    -------
    pd.DataFrame
    zColumn z is not a stringz is not uniquer   z
Data type z not handled yet_INTERCHANGE_PROTOCOL_BUFFERS)column_namesr   strr   get_column_by_namedtyper	   INTUINTFLOATBOOLprimitive_column_to_ndarrayCATEGORICALcategorical_column_to_seriesSTRINGstring_column_to_ndarrayDATETIMEdatetime_column_to_ndarrayNotImplementedErrorr%   r   r   attrs)r   columnsbuffersnamecolr4   bufr.   r   r   r   r$   ]   s6   







r$   rE   r   tuple[np.ndarray, Any]c                 C  s>   |   }|d \}}t||| j| j}t|| |d }||fS )aJ  
    Convert a column holding one of the primitive dtypes to a NumPy array.

    A primitive type is one of: int, uint, float, bool.

    Parameters
    ----------
    col : Column

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    datavalidity)get_buffersbuffer_to_ndarrayoffsetsize	set_nulls)rE   rC   	data_buff
data_dtyperH   r   r   r   r9      s
   r9   tuple[pd.Series, Any]c                 C  s   | j }|d std|d }t|tsJ dt|j}|  }|d \}}t||| j	| j
}||t|  }tj|||d d}	t|	}
t|
| |d }
|
|fS )	a  
    Convert a column holding categorical data to a pandas Series.

    Parameters
    ----------
    col : Column

    Returns
    -------
    tuple
        Tuple of pd.Series holding the data and the memory owner object
        that keeps the memory alive.
    is_dictionaryz-Non-dictionary categoricals not supported yet
categoriesz!categories must be a PandasColumnrH   
is_ordered)rS   orderedrI   )describe_categoricalr@   r   r   nparray_colrJ   rK   rL   rM   r&   r   CategoricalSeriesrN   )rE   categorical
cat_columnrS   rC   
codes_buffcodes_dtypecodesvaluescatrH   r   r   r   r;      s    

r;   c                 C  s  | j \}}|tjtjtjfvrt| d|  }|d s"J d|d \}}|d dks0J |d tjks9J t	j
dtjtjf}t||d| jd	}|d \}}	t||	| j| jd d
}
d}|tjtjfv r|d soJ d|d \}}t||| j| j}|dkr| }dg| j }t| jD ]*}|dur|| rtj||< q||
| |
|d   }t|}|jdd}|||< qtj|dd|fS )a  
    Convert a column holding string data to a NumPy array.

    Parameters
    ----------
    col : Column

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    z3 null kind is not yet supported for string columns.offsetsz#String buffers must contain offsetsrH   r   r      r   )rL   length)re   NrI   z*Validity buffers cannot be empty for maskszutf-8)encodingobjectr4   )describe_nullr   NON_NULLABLEUSE_BITMASKUSE_BYTEMASKr@   rJ   r
   r<   r	   r6   UINT8r   NATIVErK   rM   rL   rangerW   nanbytesdecodeasarray)rE   	null_kindsentinel_valrC   rO   protocol_data_dtyperP   rH   offset_buffoffset_dtyperc   null_pos
valid_buffvalid_dtypestr_listiunits	str_bytesstringr   r   r   r=      sR   


r=   c                 C  s   t d| }|r.|d|d}}|dkrtd|dkr#|d7 }|d| d}|S t d	| }|r^|d}|d
krL|tjd d}|S |dkrW|d}|S td| td|  )z4Parse datetime `format_str` to interpret the `data`.zts([smun]):(.*)r   rd    zTimezones are not supported yetszdatetime64[]ztd([Dm])DiQ zdatetime64[s]mzdatetime64[ms]zDate unit is not supported: z DateTime kind is not supported: )rematchgroupr@   astyperW   uint64)
format_strrH   timestamp_metaunittz	date_metar   r   r   parse_datetime_format_str#  s(   

r   c              	   C  sv   |   }| j\}}}}|d \}}t|tj|d ttd|d  tjf| j	| j
}t||}t|| |d }||fS )a  
    Convert a column holding DateTime data to a NumPy array.

    Parameters
    ----------
    col : Column

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    rH   r   r6   rI   )rJ   r4   rK   r	   r6   getattrr
   r   rn   rL   rM   r   rN   )rE   rC   _r   dbufr4   rH   r   r   r   r?   D  s    
r?   bufferr   r4   tuple[DtypeKind, int, str, str]rL   intre   
int | None
np.ndarrayc                 C  s   |\}}}}t |i |d}|du rtd| dtj|}t| j|| d  t	|}	|dkrQ|dus>J dtjj
|	| jfd}
t|
||d dS tjj
|	| j|d  fdS )	a  
    Build a NumPy array from the passed buffer.

    Parameters
    ----------
    buffer : Buffer
        Buffer to build a NumPy array from.
    dtype : tuple
        Data type of the buffer conforming protocol dtypes format.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    length : int, optional
        If the buffer is a bit-mask, specifies a number of bits to read
        from the buffer. Has no effect otherwise.

    Returns
    -------
    np.ndarray

    Notes
    -----
    The returned array doesn't own the memory. The caller of this function is
    responsible for keeping the memory owner object alive as long as
    the returned NumPy array is being used.
    NzConversion for  is not yet supported.r   r   z1`length` must be specified for a bit-mask buffer.)shape)first_byte_offset)r   r*   r@   rW   	ctypeslibas_ctypes_typectypescastptrPOINTERas_arraybufsizebitmask_to_bool_ndarray)r   r4   rL   re   kind	bit_widthr   column_dtypectypes_typedata_pointerarrr   r   r   rK   h  s   rK   bitmaskmask_lengthr   c           
      C  s  |d }| |d } |d; }t j|td}| d }d}td| |}t|D ]}|d|| > @ r4d||< |d7 }q&t|| d D ]}	| |	d  }tdD ]}|d|> @ rYd||< |d7 }qMqAt| dkr| d }tt|| D ]}|d|> @ r}d||< |d7 }qq|S )a  
    Convert bit-mask to a boolean NumPy array.

    Parameters
    ----------
    bitmask : np.ndarray[uint8]
        NumPy array of uint8 dtype representing the bitmask.
    mask_length : int
        Number of elements in the mask to interpret.
    first_byte_offset : int, default: 0
        Number of elements to offset from the start of the first byte.

    Returns
    -------
    np.ndarray[bool]
    r   Nrh   r   r   T)rW   zerosboolminro   r&   )
r   r   r   bytes_to_skip	bool_maskvalmask_idxbits_in_first_bytejr}   r   r   r   r     s2   


r   rH   np.ndarray | pd.SeriesrI   5tuple[Buffer, tuple[DtypeKind, int, str, str]] | Noneallow_modify_inplacer   c           	      C  s   |j \}}d}|tjkrt| |k}n4|tjtjfv r7|s"J d|\}}t|||j|j	}|dkr6| }n|tj
tjfv r@ntd| d|durrt|rr|sW|  } zd| |< W | S  tyq   | t} d| |< Y | S w | S )a  
    Set null values for the data according to the column null kind.

    Parameters
    ----------
    data : np.ndarray or pd.Series
        Data to set nulls in.
    col : Column
        Column object that describes the `data`.
    validity : tuple(Buffer, dtype) or None
        The return value of ``col.buffers()``. We do not access the ``col.buffers()``
        here to not take the ownership of the memory of buffer objects.
    allow_modify_inplace : bool, default: True
        Whether to modify the `data` inplace when zero-copy is possible (True) or always
        modify a copy of the `data` (False).

    Returns
    -------
    np.ndarray or pd.Series
        Data with the nulls being set.
    Nz/Expected to have a validity buffer for the maskr   z
Null kind r   )ri   r   USE_SENTINELr   r[   rk   rl   rK   rL   rM   rj   USE_NANr@   rW   anyr"   	TypeErrorr   float)	rH   rE   rI   r   rt   ru   ry   rz   r{   r   r   r   rN     s4   




rN   )T)r   r   )r   r   )r   r   r   r   )rE   r   r   rG   )rE   r   r   rQ   )r   N)
r   r   r4   r   rL   r   re   r   r   r   )r   )r   r   r   r   r   r   r   r   )rH   r   rE   r   rI   r   r   r   )2
__future__r   r   r   typingr   numpyrW   pandasr   pandas.core.interchange.columnr   *pandas.core.interchange.dataframe_protocolr   r   r   r   r   r	   pandas.core.interchange.utilsr
   r   r5   int8int16int32int64r6   uint8uint16uint32r   r7   float32float64r8   r   r   __annotations__r   r   r$   r9   r;   r=   r   r?   rK   r   rN   r   r   r   r   <module>   s:    

%
.

)V
!'8: